2626import time
2727import uuid
2828import warnings
29+ from collections import OrderedDict
2930from configparser import NoSectionError
3031from contextlib import contextmanager
3132from pathlib import Path
3637import patoolib
3738import requests
3839from git import GitCommandError , GitError , Repo
40+ from wcmatch import glob
3941
4042from renku .core import errors
4143from renku .core .management .clone import clone
@@ -520,33 +522,37 @@ def _add_from_git(self, dataset, url, sources, destination, ref):
520522
521523 # Get all files from repo that match sources
522524 repo , repo_path = self .prepare_git_repo (url , ref )
523- copied_sources = set ()
524525 files = set ()
526+ used_sources = set ()
525527 for file in repo .head .commit .tree .traverse ():
526528 path = file .path
527529 result = self ._get_src_and_dst (
528- path , repo_path , sources , destination
530+ path , repo_path , sources , destination , used_sources
529531 )
530532
531533 if result :
532534 files .add (result )
533- source = result [3 ]
534- copied_sources .add (source )
535535
536- uncopied_sources = sources - copied_sources
537- if uncopied_sources :
538- uncopied_sources = {str (s ) for s in uncopied_sources }
536+ unused_sources = set ( sources . keys ()) - used_sources
537+ if unused_sources :
538+ unused_sources = {str (s ) for s in unused_sources }
539539 raise errors .ParameterError (
540- 'No such file or directory' , param_hint = uncopied_sources
540+ 'No such file or directory' , param_hint = unused_sources
541541 )
542542
543+ if destination .exists () and not destination .is_dir ():
544+ if len (files ) > 1 :
545+ raise errors .ParameterError (
546+ 'Cannot copy multiple files or directories to a file'
547+ )
548+
543549 # Create metadata and move files to dataset
544550 results = []
545551 remote_client = LocalClient (repo_path )
546552
547553 # Pull files from LFS
548554 paths = set ()
549- for path , src , _ , __ in files :
555+ for path , src , _ in files :
550556 if src .is_dir ():
551557 continue
552558 if src .is_symlink ():
@@ -561,7 +567,7 @@ def _add_from_git(self, dataset, url, sources, destination, ref):
561567 paths = {f [0 ] for f in files }
562568 metadata = self ._fetch_files_metadata (remote_client , paths )
563569
564- for path , src , dst , _ in files :
570+ for path , src , dst in files :
565571 if not src .is_dir ():
566572 # Use original metadata if it exists
567573 based_on = metadata .get (path )
@@ -605,7 +611,11 @@ def _check_overwrite(self, files, force):
605611
606612 def _resolve_paths (self , root_path , paths ):
607613 """Check if paths are within a root path and resolve them."""
608- return {self ._resolve_path (root_path , p ) for p in paths }
614+ result = OrderedDict () # Used as an ordered-set
615+ for path in paths :
616+ r = self ._resolve_path (root_path , path )
617+ result [r ] = None
618+ return result
609619
610620 def _resolve_path (self , root_path , path ):
611621 """Check if a path is within a root path and resolve it."""
@@ -618,18 +628,27 @@ def _resolve_path(self, root_path, path):
618628 'File {} is not within path {}' .format (path , root_path )
619629 )
620630
621- def _get_src_and_dst (self , path , repo_path , sources , dst_root ):
631+ def _get_src_and_dst (
632+ self , path , repo_path , sources , dst_root , used_sources
633+ ):
634+ is_wildcard = False
635+
622636 if not sources :
623637 source = Path ('.' )
624638 else :
625639 source = None
626- for s in sources :
640+ for s in sources . keys () :
627641 try :
628642 Path (path ).relative_to (s )
629643 except ValueError :
630- pass
644+ if glob .globmatch (path , str (s ), flags = glob .GLOBSTAR ):
645+ is_wildcard = True
646+ source = path
647+ used_sources .add (s )
648+ break
631649 else :
632650 source = s
651+ used_sources .add (source )
633652 break
634653
635654 if not source :
@@ -639,24 +658,26 @@ def _get_src_and_dst(self, path, repo_path, sources, dst_root):
639658 source_name = Path (source ).name
640659 relative_path = Path (path ).relative_to (source )
641660
642- if not dst_root .exists ():
643- if len (sources ) == 1 :
661+ if src .is_dir () and is_wildcard :
662+ sources [source ] = None
663+ used_sources .add (source )
664+
665+ if not dst_root .exists (): # Destination will be a file or directory
666+ if len (sources ) == 1 and not is_wildcard :
644667 dst = dst_root / relative_path
645668 else : # Treat destination as a directory
646669 dst = dst_root / source_name / relative_path
647670 elif dst_root .is_dir ():
648671 dst = dst_root / source_name / relative_path
649672 else : # Destination is an existing file
650- if len (sources ) == 1 and not src .is_dir ():
651- dst = dst_root
652- elif not sources :
653- raise errors .ParameterError ('Cannot copy repo to file' )
654- else :
673+ if src .is_dir ():
655674 raise errors .ParameterError (
656675 'Cannot copy multiple files or directories to a file'
657676 )
677+ # Later we need to check if we are copying multiple files
678+ dst = dst_root
658679
659- return (path , src , dst , source )
680+ return (path , src , dst )
660681
661682 def _fetch_lfs_files (self , repo_path , paths ):
662683 """Fetch and checkout paths that are tracked by Git LFS."""
0 commit comments