diff --git a/.changelog/5185.yml b/.changelog/5185.yml
new file mode 100644
index 00000000000..3edf926e10c
--- /dev/null
+++ b/.changelog/5185.yml
@@ -0,0 +1,4 @@
+changes:
+- description: Improved handling private repositories.
+ type: feature
+pr_number: 5185
diff --git a/demisto_sdk/commands/common/git_util.py b/demisto_sdk/commands/common/git_util.py
index f5ce611d10b..0fcfafeee0f 100644
--- a/demisto_sdk/commands/common/git_util.py
+++ b/demisto_sdk/commands/common/git_util.py
@@ -23,6 +23,7 @@
PACKS_FOLDER,
)
from demisto_sdk.commands.common.logger import logger
+from demisto_sdk.commands.common.string_to_bool import string_to_bool
class CommitOrBranchNotFoundError(GitError):
@@ -538,7 +539,7 @@ def deleted_files(
committed = set()
if not staged_only:
- # get all committed files identified as added which are changed from prev_ver.
+ # get all committed files identified as deleted which are changed from prev_ver.
# this can result in extra files identified which were not touched on this branch.
if remote:
committed = {
@@ -559,9 +560,22 @@ def deleted_files(
}
# identify all files that were touched on this branch regardless of status
- # intersect these with all the committed files to identify the committed added files.
- all_branch_changed_files = self._get_all_changed_files(prev_ver)
- committed = committed.intersection(all_branch_changed_files)
+ # intersect these with all the committed files to identify the committed deleted files.
+ # EXCEPT in private repo mode - we want to catch ALL deletions, not just those in changed files
+ is_private_repo = string_to_bool(
+ os.getenv("DEMISTO_SDK_PRIVATE_REPO_MODE", ""), default_when_empty=False
+ )
+
+ if not is_private_repo:
+ all_branch_changed_files = self._get_all_changed_files(prev_ver)
+ committed = committed.intersection(all_branch_changed_files)
+ else:
+ # Filter out .gitkeep files and files under AssetsModelingRules
+ committed = {
+ f
+ for f in committed
+ if f.name != ".gitkeep" and "AssetsModelingRules" not in str(f)
+ }
if committed_only:
return committed
@@ -571,7 +585,7 @@ def deleted_files(
# get all untracked deleted files
untracked = self._get_untracked_files("D")
- # get all the files that are staged on the branch and identified as added.
+ # get all the files that are staged on the branch and identified as deleted.
staged = {
Path(os.path.join(item.a_path)) # type: ignore
for item in self.repo.head.commit.diff().iter_change_type("D")
@@ -696,11 +710,32 @@ def renamed_files(
return all_renamed_files
def get_all_changed_pack_ids(self, prev_ver: str) -> Set[str]:
- return {
- file.parts[1]
- for file in self._get_all_changed_files(prev_ver) | self._get_staged_files()
- if file.parts[0] == PACKS_FOLDER
+ # Handle case where prev_ver might be a boolean or invalid value
+ # If prev_ver is True/False or not a string, use empty string (will use default branch)
+ if not isinstance(prev_ver, str) or prev_ver in ("True", "False"):
+ prev_ver = ""
+
+ # In private repos, ignore the graph's old commit and use actual branch changes
+ # This prevents updating all packs that diverged since the old graph commit
+ is_private_repo = string_to_bool(
+ os.getenv("DEMISTO_SDK_PRIVATE_REPO_MODE", ""), default_when_empty=False
+ )
+
+ if is_private_repo:
+ # Get only files changed in the current branch, not all diverged files
+ changed_files = (
+ self.get_all_changed_files(prev_ver="", committed_only=True)
+ | self._get_staged_files()
+ )
+ else:
+ changed_files = (
+ self._get_all_changed_files(prev_ver) | self._get_staged_files()
+ )
+
+ pack_ids = {
+ file.parts[1] for file in changed_files if file.parts[0] == PACKS_FOLDER
}
+ return pack_ids
def _get_untracked_files(self, requested_status: str) -> set:
"""return all untracked files of the given requested status.
@@ -737,11 +772,12 @@ def _get_staged_files(self) -> Set[Path]:
Returns:
Set[Path]: The staged files to return
"""
- return {
+ staged_files = {
Path(item)
for item in self.repo.git.diff("--cached", "--name-only").split("\n")
if item
}
+ return staged_files
def _get_all_changed_files(self, prev_ver: Optional[str] = None) -> Set[Path]:
"""
@@ -753,13 +789,12 @@ def _get_all_changed_files(self, prev_ver: Optional[str] = None) -> Set[Path]:
Returns:
Set[Path]: of Paths to files changed in the current branch.
"""
-
self.fetch()
remote, branch = self.handle_prev_ver(prev_ver)
current_hash = self.get_current_commit_hash()
if remote:
- return {
+ changed_files = {
Path(os.path.join(item))
for item in self.repo.git.diff(
"--name-only", f"{remote}/{branch}...{current_hash}"
@@ -767,15 +802,35 @@ def _get_all_changed_files(self, prev_ver: Optional[str] = None) -> Set[Path]:
if item
}
- # if remote does not exist we are checking against the commit sha1
+ # if remote does not exist we are checking against the commit sha1 or branch
else:
- return {
+ is_private_repo = string_to_bool(
+ os.getenv("DEMISTO_SDK_PRIVATE_REPO_MODE", ""), default_when_empty=False
+ )
+
+ # In private repos, always use merge-base to find common ancestor
+ # This prevents including all diverged commits
+ if is_private_repo:
+ try:
+ merge_base = self.repo.git.merge_base(branch, current_hash).strip()
+ compare_from = merge_base
+ diff_operator = ".."
+ except Exception:
+ compare_from = branch
+ diff_operator = ".."
+ else:
+ # For non-private repos, use three-dot diff
+ diff_operator = "..."
+ compare_from = branch
+
+ changed_files = {
Path(os.path.join(item))
for item in self.repo.git.diff(
- "--name-only", f"{branch}...{current_hash}"
+ "--name-only", f"{compare_from}{diff_operator}{current_hash}"
).split("\n")
if item
}
+ return changed_files
def _only_last_commit(
self, prev_ver: str, requested_status: Lit_change_type
@@ -847,6 +902,10 @@ def find_primary_branch(repo: Repo) -> str:
return ""
def handle_prev_ver(self, prev_ver: Optional[str] = None):
+ if string_to_bool(
+ os.getenv("DEMISTO_SDK_PRIVATE_REPO_MODE", ""), default_when_empty=False
+ ):
+ return "", prev_ver or "master"
# check for sha1 in regex
sha1_pattern = re.compile(r"\b[0-9a-f]{40}\b", flags=re.IGNORECASE)
if prev_ver and sha1_pattern.match(prev_ver):
diff --git a/demisto_sdk/commands/content_graph/parsers/modeling_rule.py b/demisto_sdk/commands/content_graph/parsers/modeling_rule.py
index 0a1c24fd82c..f76dab80ebe 100644
--- a/demisto_sdk/commands/content_graph/parsers/modeling_rule.py
+++ b/demisto_sdk/commands/content_graph/parsers/modeling_rule.py
@@ -70,6 +70,9 @@ def validate_structure(self) -> List[StructureError]:
directory_pydantic_error = []
directory = self.path if self.path.is_dir() else self.path.parent
for file in directory.iterdir():
+ # Skip testdata.json files as they have a different structure
+ if file.name.endswith("testdata.json"):
+ continue
try:
if file.suffix == ".yml":
StrictModelingRule.parse_obj(get_file(file))
diff --git a/demisto_sdk/commands/pre_commit/pre_commit_command.py b/demisto_sdk/commands/pre_commit/pre_commit_command.py
index 67b314bc3b6..a0cc41b1a33 100644
--- a/demisto_sdk/commands/pre_commit/pre_commit_command.py
+++ b/demisto_sdk/commands/pre_commit/pre_commit_command.py
@@ -749,18 +749,51 @@ def preprocess_files(
Returns:
Set[Path]: The set of files to run pre-commit on.
"""
+ from demisto_sdk.commands.common.string_to_bool import string_to_bool
+
+ # Auto-enable committed_only for private repos to avoid getting all diverged files
+ is_private_repo = string_to_bool(
+ os.getenv("DEMISTO_SDK_PRIVATE_REPO_MODE", ""), default_when_empty=False
+ )
+ if (
+ is_private_repo
+ and use_git
+ and not commited_only
+ and not staged_only
+ and not all_files
+ ):
+ logger.info(
+ "DEMISTO_SDK_PRIVATE_REPO_MODE detected - automatically enabling committed_only mode"
+ )
+ logger.info(
+ "This prevents including all diverged files from master in private repos"
+ )
+ commited_only = True
+
git_util = GitUtil()
staged_files = git_util._get_staged_files()
all_git_files = git_util.get_all_files().union(staged_files)
contribution_flow = os.getenv("CONTRIB_BRANCH")
+
if input_files:
raw_files = set(input_files)
elif staged_only:
raw_files = staged_files
elif use_git:
- raw_files = git_util._get_all_changed_files(prev_version)
- if not commited_only:
+ if commited_only and is_private_repo:
+ # For committed_only mode, get files from actual commits using get_all_changed_files
+ # which properly filters by commit status
+ raw_files = git_util.get_all_changed_files(
+ prev_ver=prev_version or "",
+ committed_only=True,
+ staged_only=False,
+ include_untracked=False,
+ )
+ else:
+ # For non-committed_only mode, use the internal method
+ raw_files = git_util._get_all_changed_files(prev_version)
raw_files = raw_files.union(staged_files)
+
if contribution_flow:
"""
If this command runs on a build triggered by an external contribution PR,
@@ -783,9 +816,11 @@ def preprocess_files(
files_to_run: Set[Path] = set()
for file in raw_files:
if file.is_dir():
- files_to_run.update({path for path in file.rglob("*") if path.is_file()})
+ dir_files = {path for path in file.rglob("*") if path.is_file()}
+ files_to_run.update(dir_files)
else:
files_to_run.update(add_related_files(file))
+
# convert to relative file to content path
relative_paths = {
file.relative_to(CONTENT_PATH) if file.is_absolute() else file
diff --git a/demisto_sdk/commands/pre_commit/pre_commit_setup.py b/demisto_sdk/commands/pre_commit/pre_commit_setup.py
index 05429b3ea53..0a586725b23 100644
--- a/demisto_sdk/commands/pre_commit/pre_commit_setup.py
+++ b/demisto_sdk/commands/pre_commit/pre_commit_setup.py
@@ -1,3 +1,4 @@
+import os
from pathlib import Path
from typing import Optional
@@ -110,6 +111,12 @@ def pre_commit(
"--log-file-path",
help="Path to save log files onto.",
),
+ handling_private_repositories: Optional[bool] = typer.Option(
+ False,
+ "-hpr",
+ "--handling-private-repositories",
+ help="Handling private repos.",
+ ),
):
"""
This command enhances the content development experience, by running a variety of checks and linters.
@@ -131,6 +138,9 @@ def pre_commit(
from demisto_sdk.commands.pre_commit.pre_commit_command import pre_commit_manager
+ if handling_private_repositories:
+ os.environ["DEMISTO_SDK_PRIVATE_REPO_MODE"] = "true"
+
return_code = pre_commit_manager(
input_files,
staged_only,
diff --git a/demisto_sdk/commands/validate/initializer.py b/demisto_sdk/commands/validate/initializer.py
index c7a58eda924..8513cd84bd3 100644
--- a/demisto_sdk/commands/validate/initializer.py
+++ b/demisto_sdk/commands/validate/initializer.py
@@ -224,7 +224,7 @@ def __init__(
self.staged = staged
self.file_path = file_path
self.committed_only = committed_only
- self.prev_ver = prev_ver
+ self.prev_ver = "master" if handling_private_repositories else prev_ver
self.execution_mode = execution_mode
self.handling_private_repositories = handling_private_repositories
@@ -458,53 +458,6 @@ def get_unfiltered_changed_files_from_git(self) -> Tuple[Set, Set, Set]:
" This indicates that there are untracked files. Unable to proceed."
)
- """
- Handles private repositories' file statuses when enabled.
-
- When handling_private_repositories is True, the system processes status files from private repositories
- to maintain accurate git status information. This is particularly useful in CI/CD pipelines where
- private repositories are involved.
-
- The system looks for the following status files in order:
- 1. content_private_files_relative_paths.txt
- 2. content_test_conf_files_relative_paths.txt
- 3. content_configuration_files_relative_paths.txt
-
- Each file should contain a JSON object where:
- - Keys are file paths
- - Values are status strings: "modified", "added", or a dict with "status": "renamed" and "old_path"
-
- The system will:
- 1. Check for each status file's existence
- 2. Parse the JSON content
- 3. For each file:
- - If status is "modified", move from added_files to modified_files
- - If status is "added", keep in added_files
- - If status is "renamed", move from added_files to renamed_files with old_path
- 4. Log any processing errors without failing
- 5. Continue with validation even if no status files are found
- """
-
- if self.handling_private_repositories:
- artifacts_folder = os.getenv("ARTIFACTS_FOLDER", "")
- logs_dir = (
- Path(artifacts_folder) / "logs" if artifacts_folder else Path("logs")
- )
-
- status_files = [
- logs_dir / PRIVATE_REPO_STATUS_FILE_PRIVATE,
- logs_dir / PRIVATE_REPO_STATUS_FILE_TEST_CONF,
- logs_dir / PRIVATE_REPO_STATUS_FILE_CONFIGURATION,
- ]
-
- for status_file in status_files:
- _process_status_file(
- status_file, modified_files, added_files, renamed_files
- )
-
- # Log files in a more readable format
- _log_file_changes(modified_files, added_files, renamed_files)
-
return modified_files, added_files, renamed_files
def specify_files_by_status(
diff --git a/demisto_sdk/scripts/validate_deleted_files.py b/demisto_sdk/scripts/validate_deleted_files.py
index 51bc01a37c9..2f4710049a6 100644
--- a/demisto_sdk/scripts/validate_deleted_files.py
+++ b/demisto_sdk/scripts/validate_deleted_files.py
@@ -52,7 +52,8 @@ def is_file_allowed_to_be_deleted_by_file_type(file_path: Path) -> bool:
)
is_silent = check_if_content_item_is_silent(file_content)
if file_type := find_type(str(file_path), file_content):
- return True if file_type in FileType_ALLOWED_TO_DELETE or is_silent else False
+ return file_type in FileType_ALLOWED_TO_DELETE or is_silent
+
return True
@@ -68,8 +69,16 @@ def get_forbidden_deleted_files(protected_dirs: Set[str]) -> List[str]:
Returns all the file paths which cannot be deleted
"""
git_util = GitUtil.from_content_path()
+
+ # Get deleted files - don't use committed_only to ensure we catch all deletions
+ raw_deleted_files = git_util.deleted_files(
+ prev_ver=DEMISTO_GIT_PRIMARY_BRANCH,
+ committed_only=False, # Get both staged and committed to catch all deletions
+ staged_only=False,
+ )
+
deleted_files = handle_private_repo_deleted_files(
- git_util.deleted_files(DEMISTO_GIT_PRIMARY_BRANCH), show_deleted_files=False
+ raw_deleted_files, show_deleted_files=False
)
deleted_files_in_protected_dirs = [