diff --git a/.changelog/5185.yml b/.changelog/5185.yml new file mode 100644 index 00000000000..3edf926e10c --- /dev/null +++ b/.changelog/5185.yml @@ -0,0 +1,4 @@ +changes: +- description: Improved handling private repositories. + type: feature +pr_number: 5185 diff --git a/demisto_sdk/commands/common/git_util.py b/demisto_sdk/commands/common/git_util.py index f5ce611d10b..0fcfafeee0f 100644 --- a/demisto_sdk/commands/common/git_util.py +++ b/demisto_sdk/commands/common/git_util.py @@ -23,6 +23,7 @@ PACKS_FOLDER, ) from demisto_sdk.commands.common.logger import logger +from demisto_sdk.commands.common.string_to_bool import string_to_bool class CommitOrBranchNotFoundError(GitError): @@ -538,7 +539,7 @@ def deleted_files( committed = set() if not staged_only: - # get all committed files identified as added which are changed from prev_ver. + # get all committed files identified as deleted which are changed from prev_ver. # this can result in extra files identified which were not touched on this branch. if remote: committed = { @@ -559,9 +560,22 @@ def deleted_files( } # identify all files that were touched on this branch regardless of status - # intersect these with all the committed files to identify the committed added files. - all_branch_changed_files = self._get_all_changed_files(prev_ver) - committed = committed.intersection(all_branch_changed_files) + # intersect these with all the committed files to identify the committed deleted files. + # EXCEPT in private repo mode - we want to catch ALL deletions, not just those in changed files + is_private_repo = string_to_bool( + os.getenv("DEMISTO_SDK_PRIVATE_REPO_MODE", ""), default_when_empty=False + ) + + if not is_private_repo: + all_branch_changed_files = self._get_all_changed_files(prev_ver) + committed = committed.intersection(all_branch_changed_files) + else: + # Filter out .gitkeep files and files under AssetsModelingRules + committed = { + f + for f in committed + if f.name != ".gitkeep" and "AssetsModelingRules" not in str(f) + } if committed_only: return committed @@ -571,7 +585,7 @@ def deleted_files( # get all untracked deleted files untracked = self._get_untracked_files("D") - # get all the files that are staged on the branch and identified as added. + # get all the files that are staged on the branch and identified as deleted. staged = { Path(os.path.join(item.a_path)) # type: ignore for item in self.repo.head.commit.diff().iter_change_type("D") @@ -696,11 +710,32 @@ def renamed_files( return all_renamed_files def get_all_changed_pack_ids(self, prev_ver: str) -> Set[str]: - return { - file.parts[1] - for file in self._get_all_changed_files(prev_ver) | self._get_staged_files() - if file.parts[0] == PACKS_FOLDER + # Handle case where prev_ver might be a boolean or invalid value + # If prev_ver is True/False or not a string, use empty string (will use default branch) + if not isinstance(prev_ver, str) or prev_ver in ("True", "False"): + prev_ver = "" + + # In private repos, ignore the graph's old commit and use actual branch changes + # This prevents updating all packs that diverged since the old graph commit + is_private_repo = string_to_bool( + os.getenv("DEMISTO_SDK_PRIVATE_REPO_MODE", ""), default_when_empty=False + ) + + if is_private_repo: + # Get only files changed in the current branch, not all diverged files + changed_files = ( + self.get_all_changed_files(prev_ver="", committed_only=True) + | self._get_staged_files() + ) + else: + changed_files = ( + self._get_all_changed_files(prev_ver) | self._get_staged_files() + ) + + pack_ids = { + file.parts[1] for file in changed_files if file.parts[0] == PACKS_FOLDER } + return pack_ids def _get_untracked_files(self, requested_status: str) -> set: """return all untracked files of the given requested status. @@ -737,11 +772,12 @@ def _get_staged_files(self) -> Set[Path]: Returns: Set[Path]: The staged files to return """ - return { + staged_files = { Path(item) for item in self.repo.git.diff("--cached", "--name-only").split("\n") if item } + return staged_files def _get_all_changed_files(self, prev_ver: Optional[str] = None) -> Set[Path]: """ @@ -753,13 +789,12 @@ def _get_all_changed_files(self, prev_ver: Optional[str] = None) -> Set[Path]: Returns: Set[Path]: of Paths to files changed in the current branch. """ - self.fetch() remote, branch = self.handle_prev_ver(prev_ver) current_hash = self.get_current_commit_hash() if remote: - return { + changed_files = { Path(os.path.join(item)) for item in self.repo.git.diff( "--name-only", f"{remote}/{branch}...{current_hash}" @@ -767,15 +802,35 @@ def _get_all_changed_files(self, prev_ver: Optional[str] = None) -> Set[Path]: if item } - # if remote does not exist we are checking against the commit sha1 + # if remote does not exist we are checking against the commit sha1 or branch else: - return { + is_private_repo = string_to_bool( + os.getenv("DEMISTO_SDK_PRIVATE_REPO_MODE", ""), default_when_empty=False + ) + + # In private repos, always use merge-base to find common ancestor + # This prevents including all diverged commits + if is_private_repo: + try: + merge_base = self.repo.git.merge_base(branch, current_hash).strip() + compare_from = merge_base + diff_operator = ".." + except Exception: + compare_from = branch + diff_operator = ".." + else: + # For non-private repos, use three-dot diff + diff_operator = "..." + compare_from = branch + + changed_files = { Path(os.path.join(item)) for item in self.repo.git.diff( - "--name-only", f"{branch}...{current_hash}" + "--name-only", f"{compare_from}{diff_operator}{current_hash}" ).split("\n") if item } + return changed_files def _only_last_commit( self, prev_ver: str, requested_status: Lit_change_type @@ -847,6 +902,10 @@ def find_primary_branch(repo: Repo) -> str: return "" def handle_prev_ver(self, prev_ver: Optional[str] = None): + if string_to_bool( + os.getenv("DEMISTO_SDK_PRIVATE_REPO_MODE", ""), default_when_empty=False + ): + return "", prev_ver or "master" # check for sha1 in regex sha1_pattern = re.compile(r"\b[0-9a-f]{40}\b", flags=re.IGNORECASE) if prev_ver and sha1_pattern.match(prev_ver): diff --git a/demisto_sdk/commands/content_graph/parsers/modeling_rule.py b/demisto_sdk/commands/content_graph/parsers/modeling_rule.py index 0a1c24fd82c..f76dab80ebe 100644 --- a/demisto_sdk/commands/content_graph/parsers/modeling_rule.py +++ b/demisto_sdk/commands/content_graph/parsers/modeling_rule.py @@ -70,6 +70,9 @@ def validate_structure(self) -> List[StructureError]: directory_pydantic_error = [] directory = self.path if self.path.is_dir() else self.path.parent for file in directory.iterdir(): + # Skip testdata.json files as they have a different structure + if file.name.endswith("testdata.json"): + continue try: if file.suffix == ".yml": StrictModelingRule.parse_obj(get_file(file)) diff --git a/demisto_sdk/commands/pre_commit/pre_commit_command.py b/demisto_sdk/commands/pre_commit/pre_commit_command.py index 67b314bc3b6..a0cc41b1a33 100644 --- a/demisto_sdk/commands/pre_commit/pre_commit_command.py +++ b/demisto_sdk/commands/pre_commit/pre_commit_command.py @@ -749,18 +749,51 @@ def preprocess_files( Returns: Set[Path]: The set of files to run pre-commit on. """ + from demisto_sdk.commands.common.string_to_bool import string_to_bool + + # Auto-enable committed_only for private repos to avoid getting all diverged files + is_private_repo = string_to_bool( + os.getenv("DEMISTO_SDK_PRIVATE_REPO_MODE", ""), default_when_empty=False + ) + if ( + is_private_repo + and use_git + and not commited_only + and not staged_only + and not all_files + ): + logger.info( + "DEMISTO_SDK_PRIVATE_REPO_MODE detected - automatically enabling committed_only mode" + ) + logger.info( + "This prevents including all diverged files from master in private repos" + ) + commited_only = True + git_util = GitUtil() staged_files = git_util._get_staged_files() all_git_files = git_util.get_all_files().union(staged_files) contribution_flow = os.getenv("CONTRIB_BRANCH") + if input_files: raw_files = set(input_files) elif staged_only: raw_files = staged_files elif use_git: - raw_files = git_util._get_all_changed_files(prev_version) - if not commited_only: + if commited_only and is_private_repo: + # For committed_only mode, get files from actual commits using get_all_changed_files + # which properly filters by commit status + raw_files = git_util.get_all_changed_files( + prev_ver=prev_version or "", + committed_only=True, + staged_only=False, + include_untracked=False, + ) + else: + # For non-committed_only mode, use the internal method + raw_files = git_util._get_all_changed_files(prev_version) raw_files = raw_files.union(staged_files) + if contribution_flow: """ If this command runs on a build triggered by an external contribution PR, @@ -783,9 +816,11 @@ def preprocess_files( files_to_run: Set[Path] = set() for file in raw_files: if file.is_dir(): - files_to_run.update({path for path in file.rglob("*") if path.is_file()}) + dir_files = {path for path in file.rglob("*") if path.is_file()} + files_to_run.update(dir_files) else: files_to_run.update(add_related_files(file)) + # convert to relative file to content path relative_paths = { file.relative_to(CONTENT_PATH) if file.is_absolute() else file diff --git a/demisto_sdk/commands/pre_commit/pre_commit_setup.py b/demisto_sdk/commands/pre_commit/pre_commit_setup.py index 05429b3ea53..0a586725b23 100644 --- a/demisto_sdk/commands/pre_commit/pre_commit_setup.py +++ b/demisto_sdk/commands/pre_commit/pre_commit_setup.py @@ -1,3 +1,4 @@ +import os from pathlib import Path from typing import Optional @@ -110,6 +111,12 @@ def pre_commit( "--log-file-path", help="Path to save log files onto.", ), + handling_private_repositories: Optional[bool] = typer.Option( + False, + "-hpr", + "--handling-private-repositories", + help="Handling private repos.", + ), ): """ This command enhances the content development experience, by running a variety of checks and linters. @@ -131,6 +138,9 @@ def pre_commit( from demisto_sdk.commands.pre_commit.pre_commit_command import pre_commit_manager + if handling_private_repositories: + os.environ["DEMISTO_SDK_PRIVATE_REPO_MODE"] = "true" + return_code = pre_commit_manager( input_files, staged_only, diff --git a/demisto_sdk/commands/validate/initializer.py b/demisto_sdk/commands/validate/initializer.py index c7a58eda924..8513cd84bd3 100644 --- a/demisto_sdk/commands/validate/initializer.py +++ b/demisto_sdk/commands/validate/initializer.py @@ -224,7 +224,7 @@ def __init__( self.staged = staged self.file_path = file_path self.committed_only = committed_only - self.prev_ver = prev_ver + self.prev_ver = "master" if handling_private_repositories else prev_ver self.execution_mode = execution_mode self.handling_private_repositories = handling_private_repositories @@ -458,53 +458,6 @@ def get_unfiltered_changed_files_from_git(self) -> Tuple[Set, Set, Set]: " This indicates that there are untracked files. Unable to proceed." ) - """ - Handles private repositories' file statuses when enabled. - - When handling_private_repositories is True, the system processes status files from private repositories - to maintain accurate git status information. This is particularly useful in CI/CD pipelines where - private repositories are involved. - - The system looks for the following status files in order: - 1. content_private_files_relative_paths.txt - 2. content_test_conf_files_relative_paths.txt - 3. content_configuration_files_relative_paths.txt - - Each file should contain a JSON object where: - - Keys are file paths - - Values are status strings: "modified", "added", or a dict with "status": "renamed" and "old_path" - - The system will: - 1. Check for each status file's existence - 2. Parse the JSON content - 3. For each file: - - If status is "modified", move from added_files to modified_files - - If status is "added", keep in added_files - - If status is "renamed", move from added_files to renamed_files with old_path - 4. Log any processing errors without failing - 5. Continue with validation even if no status files are found - """ - - if self.handling_private_repositories: - artifacts_folder = os.getenv("ARTIFACTS_FOLDER", "") - logs_dir = ( - Path(artifacts_folder) / "logs" if artifacts_folder else Path("logs") - ) - - status_files = [ - logs_dir / PRIVATE_REPO_STATUS_FILE_PRIVATE, - logs_dir / PRIVATE_REPO_STATUS_FILE_TEST_CONF, - logs_dir / PRIVATE_REPO_STATUS_FILE_CONFIGURATION, - ] - - for status_file in status_files: - _process_status_file( - status_file, modified_files, added_files, renamed_files - ) - - # Log files in a more readable format - _log_file_changes(modified_files, added_files, renamed_files) - return modified_files, added_files, renamed_files def specify_files_by_status( diff --git a/demisto_sdk/scripts/validate_deleted_files.py b/demisto_sdk/scripts/validate_deleted_files.py index 51bc01a37c9..2f4710049a6 100644 --- a/demisto_sdk/scripts/validate_deleted_files.py +++ b/demisto_sdk/scripts/validate_deleted_files.py @@ -52,7 +52,8 @@ def is_file_allowed_to_be_deleted_by_file_type(file_path: Path) -> bool: ) is_silent = check_if_content_item_is_silent(file_content) if file_type := find_type(str(file_path), file_content): - return True if file_type in FileType_ALLOWED_TO_DELETE or is_silent else False + return file_type in FileType_ALLOWED_TO_DELETE or is_silent + return True @@ -68,8 +69,16 @@ def get_forbidden_deleted_files(protected_dirs: Set[str]) -> List[str]: Returns all the file paths which cannot be deleted """ git_util = GitUtil.from_content_path() + + # Get deleted files - don't use committed_only to ensure we catch all deletions + raw_deleted_files = git_util.deleted_files( + prev_ver=DEMISTO_GIT_PRIMARY_BRANCH, + committed_only=False, # Get both staged and committed to catch all deletions + staged_only=False, + ) + deleted_files = handle_private_repo_deleted_files( - git_util.deleted_files(DEMISTO_GIT_PRIMARY_BRANCH), show_deleted_files=False + raw_deleted_files, show_deleted_files=False ) deleted_files_in_protected_dirs = [