Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 80 additions & 15 deletions demisto_sdk/commands/common/git_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
PACKS_FOLDER,
)
from demisto_sdk.commands.common.logger import logger
from demisto_sdk.commands.common.string_to_bool import string_to_bool


class CommitOrBranchNotFoundError(GitError):
Expand Down Expand Up @@ -538,7 +539,7 @@ def deleted_files(
committed = set()

if not staged_only:
# get all committed files identified as added which are changed from prev_ver.
# get all committed files identified as deleted which are changed from prev_ver.
# this can result in extra files identified which were not touched on this branch.
if remote:
committed = {
Expand All @@ -559,9 +560,30 @@ def deleted_files(
}

# identify all files that were touched on this branch regardless of status
# intersect these with all the committed files to identify the committed added files.
all_branch_changed_files = self._get_all_changed_files(prev_ver)
committed = committed.intersection(all_branch_changed_files)
# intersect these with all the committed files to identify the committed deleted files.
# EXCEPT in private repo mode - we want to catch ALL deletions, not just those in changed files
is_private_repo = string_to_bool(
os.getenv("DEMISTO_SDK_PRIVATE_REPO_MODE", ""), default_when_empty=False
)

if not is_private_repo:
all_branch_changed_files = self._get_all_changed_files(prev_ver)
committed = committed.intersection(all_branch_changed_files)

else:
# In private repo mode, we still need to filter to only files changed in THIS branch
# Otherwise we get ALL files deleted between master and current branch
all_branch_changed_files = self._get_all_changed_files(prev_ver)

# Intersect with branch changes to get only deletions in THIS branch
committed = committed.intersection(all_branch_changed_files)

# Filter out .gitkeep files and files under AssetsModelingRules
committed = {
f
for f in committed
if f.name != ".gitkeep" and "AssetsModelingRules" not in str(f)
}

if committed_only:
return committed
Expand All @@ -571,12 +593,20 @@ def deleted_files(
# get all untracked deleted files
untracked = self._get_untracked_files("D")

# get all the files that are staged on the branch and identified as added.
# get all the files that are staged on the branch and identified as deleted.
staged = {
Path(os.path.join(item.a_path)) # type: ignore
for item in self.repo.head.commit.diff().iter_change_type("D")
}.union(untracked)

# Also get unstaged deleted files (working directory deletions not yet staged)
# This ensures we catch all deletions when committed_only=False
unstaged_deleted = {
Path(os.path.join(item.a_path)) # type: ignore
for item in self.repo.head.commit.diff(None).iter_change_type("D")
}
staged = staged.union(unstaged_deleted)

if staged_only:
return staged

Expand Down Expand Up @@ -696,11 +726,32 @@ def renamed_files(
return all_renamed_files

def get_all_changed_pack_ids(self, prev_ver: str) -> Set[str]:
return {
file.parts[1]
for file in self._get_all_changed_files(prev_ver) | self._get_staged_files()
if file.parts[0] == PACKS_FOLDER
# Handle case where prev_ver might be a boolean or invalid value
# If prev_ver is True/False or not a string, use empty string (will use default branch)
if not isinstance(prev_ver, str) or prev_ver in ("True", "False"):
prev_ver = ""

# In private repos, ignore the graph's old commit and use actual branch changes
# This prevents updating all packs that diverged since the old graph commit
is_private_repo = string_to_bool(
os.getenv("DEMISTO_SDK_PRIVATE_REPO_MODE", ""), default_when_empty=False
)

if is_private_repo:
# Get only files changed in the current branch, not all diverged files
changed_files = (
self.get_all_changed_files(prev_ver="", committed_only=True)
| self._get_staged_files()
)
else:
changed_files = (
self._get_all_changed_files(prev_ver) | self._get_staged_files()
)

pack_ids = {
file.parts[1] for file in changed_files if file.parts[0] == PACKS_FOLDER
}
return pack_ids

def _get_untracked_files(self, requested_status: str) -> set:
"""return all untracked files of the given requested status.
Expand Down Expand Up @@ -737,11 +788,12 @@ def _get_staged_files(self) -> Set[Path]:
Returns:
Set[Path]: The staged files to return
"""
return {
staged_files = {
Path(item)
for item in self.repo.git.diff("--cached", "--name-only").split("\n")
if item
}
return staged_files

def _get_all_changed_files(self, prev_ver: Optional[str] = None) -> Set[Path]:
"""
Expand All @@ -753,29 +805,38 @@ def _get_all_changed_files(self, prev_ver: Optional[str] = None) -> Set[Path]:
Returns:
Set[Path]: of Paths to files changed in the current branch.
"""

self.fetch()
remote, branch = self.handle_prev_ver(prev_ver)
current_hash = self.get_current_commit_hash()

# Check if branch is a commit hash (40-char hex string)
sha1_pattern = re.compile(r"\b[0-9a-f]{40}\b", flags=re.IGNORECASE)
is_commit_hash = bool(sha1_pattern.match(branch))

if remote:
return {
changed_files = {
Path(os.path.join(item))
for item in self.repo.git.diff(
"--name-only", f"{remote}/{branch}...{current_hash}"
).split("\n")
if item
}

# if remote does not exist we are checking against the commit sha1
# if remote does not exist we are checking against the commit sha1 or branch
else:
return {
# For non-private repos or commit hashes:
# - Use two-dot diff for commit hashes
# - Use three-dot diff for branches
diff_operator = ".." if is_commit_hash else "..."

changed_files = {
Path(os.path.join(item))
for item in self.repo.git.diff(
"--name-only", f"{branch}...{current_hash}"
"--name-only", f"{branch}{diff_operator}{current_hash}"
).split("\n")
if item
}
return changed_files

def _only_last_commit(
self, prev_ver: str, requested_status: Lit_change_type
Expand Down Expand Up @@ -847,6 +908,10 @@ def find_primary_branch(repo: Repo) -> str:
return ""

def handle_prev_ver(self, prev_ver: Optional[str] = None):
if string_to_bool(
os.getenv("DEMISTO_SDK_PRIVATE_REPO_MODE", ""), default_when_empty=False
):
return "", prev_ver or "master"
# check for sha1 in regex
sha1_pattern = re.compile(r"\b[0-9a-f]{40}\b", flags=re.IGNORECASE)
if prev_ver and sha1_pattern.match(prev_ver):
Expand Down
3 changes: 2 additions & 1 deletion demisto_sdk/commands/content_graph/commands/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,8 @@ def update_content_graph(
content_graph_interface, marketplace, dependencies, output_path
)
return
if use_git and (commit := content_graph_interface.commit and not is_external_repo):
if use_git and content_graph_interface.commit and not is_external_repo:
commit = content_graph_interface.commit
try:
git_util.get_all_changed_pack_ids(commit) # type: ignore[arg-type]
except Exception as e:
Expand Down
60 changes: 46 additions & 14 deletions demisto_sdk/commands/pre_commit/pre_commit_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,6 @@ def run_hook(
Returns:
int: return code - 0 if hook passed, 1 if failed
"""
logger.debug(f"Running hook {hook_id}")

if json_output_path and json_output_path.is_dir():
json_output_path = json_output_path / f"{hook_id}.json"

Expand Down Expand Up @@ -254,13 +252,14 @@ def run(

num_processes = cpu_count()
all_hooks_exit_codes = []
hooks_to_run = PreCommitRunner.original_hook_id_to_generated_hook_ids.items()
logger.debug(f"run {hooks_to_run=}")

for original_hook_id, generated_hooks in hooks_to_run:
for (
original_hook_id,
generated_hooks,
) in PreCommitRunner.original_hook_id_to_generated_hook_ids.items():
if generated_hooks:
logger.debug(f"Running hook {original_hook_id} with {generated_hooks}")
hook_ids = generated_hooks.hook_ids

if (
generated_hooks.parallel
and len(hook_ids) > 1
Expand All @@ -273,6 +272,7 @@ def run(
json_output_path.parent / json_output_path.stem
)
json_output_path.mkdir(exist_ok=True)

with ThreadPool(num_processes) as pool:
current_hooks_exit_codes = pool.map(
partial(
Expand All @@ -297,12 +297,8 @@ def run(

all_hooks_exit_codes.extend(current_hooks_exit_codes)

else:
logger.debug(
f"Skipping hook {original_hook_id} as it does not have any generated-hook-ids"
)

return_code = int(any(all_hooks_exit_codes))

if return_code and show_diff_on_failure:
logger.info(
"Pre-Commit changed the following. If you experience this in CI, please run `demisto-sdk pre-commit`"
Expand All @@ -315,6 +311,7 @@ def run(
logger.info( # noqa: PLE1205
"{}", git_diff.stdout
)

return return_code

@staticmethod
Expand Down Expand Up @@ -749,18 +746,51 @@ def preprocess_files(
Returns:
Set[Path]: The set of files to run pre-commit on.
"""
from demisto_sdk.commands.common.string_to_bool import string_to_bool

# Auto-enable committed_only for private repos to avoid getting all diverged files
is_private_repo = string_to_bool(
os.getenv("DEMISTO_SDK_PRIVATE_REPO_MODE", ""), default_when_empty=False
)
if (
is_private_repo
and use_git
and not commited_only
and not staged_only
and not all_files
):
logger.info(
"<yellow>DEMISTO_SDK_PRIVATE_REPO_MODE detected - automatically enabling committed_only mode</yellow>"
)
logger.info(
"<yellow>This prevents including all diverged files from master in private repos</yellow>"
)
commited_only = True

git_util = GitUtil()
staged_files = git_util._get_staged_files()
all_git_files = git_util.get_all_files().union(staged_files)
contribution_flow = os.getenv("CONTRIB_BRANCH")

if input_files:
raw_files = set(input_files)
elif staged_only:
raw_files = staged_files
elif use_git:
raw_files = git_util._get_all_changed_files(prev_version)
if not commited_only:
if commited_only:
# For committed_only mode, get files from actual commits using get_all_changed_files
# which properly filters by commit status
raw_files = git_util.get_all_changed_files(
prev_ver=prev_version or "",
committed_only=True,
staged_only=False,
include_untracked=False,
)
else:
# For non-committed_only mode, use the internal method
raw_files = git_util._get_all_changed_files(prev_version)
raw_files = raw_files.union(staged_files)

if contribution_flow:
"""
If this command runs on a build triggered by an external contribution PR,
Expand All @@ -783,9 +813,11 @@ def preprocess_files(
files_to_run: Set[Path] = set()
for file in raw_files:
if file.is_dir():
files_to_run.update({path for path in file.rglob("*") if path.is_file()})
dir_files = {path for path in file.rglob("*") if path.is_file()}
files_to_run.update(dir_files)
else:
files_to_run.update(add_related_files(file))

# convert to relative file to content path
relative_paths = {
file.relative_to(CONTENT_PATH) if file.is_absolute() else file
Expand Down
10 changes: 10 additions & 0 deletions demisto_sdk/commands/pre_commit/pre_commit_setup.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
from pathlib import Path
from typing import Optional

Expand Down Expand Up @@ -110,6 +111,12 @@ def pre_commit(
"--log-file-path",
help="Path to save log files onto.",
),
handling_private_repositories: Optional[bool] = typer.Option(
False,
"-hpr",
"--handling-private-repositories",
help="Handling private repos.",
),
):
"""
This command enhances the content development experience, by running a variety of checks and linters.
Expand All @@ -131,6 +138,9 @@ def pre_commit(

from demisto_sdk.commands.pre_commit.pre_commit_command import pre_commit_manager

if handling_private_repositories:
os.environ["DEMISTO_SDK_PRIVATE_REPO_MODE"] = "true"

return_code = pre_commit_manager(
input_files,
staged_only,
Expand Down
40 changes: 20 additions & 20 deletions demisto_sdk/commands/validate/initializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def __init__(
self.staged = staged
self.file_path = file_path
self.committed_only = committed_only
self.prev_ver = prev_ver
self.prev_ver = "master" if handling_private_repositories else prev_ver
self.execution_mode = execution_mode
self.handling_private_repositories = handling_private_repositories

Expand Down Expand Up @@ -485,25 +485,25 @@ def get_unfiltered_changed_files_from_git(self) -> Tuple[Set, Set, Set]:
5. Continue with validation even if no status files are found
"""

if self.handling_private_repositories:
artifacts_folder = os.getenv("ARTIFACTS_FOLDER", "")
logs_dir = (
Path(artifacts_folder) / "logs" if artifacts_folder else Path("logs")
)

status_files = [
logs_dir / PRIVATE_REPO_STATUS_FILE_PRIVATE,
logs_dir / PRIVATE_REPO_STATUS_FILE_TEST_CONF,
logs_dir / PRIVATE_REPO_STATUS_FILE_CONFIGURATION,
]

for status_file in status_files:
_process_status_file(
status_file, modified_files, added_files, renamed_files
)

# Log files in a more readable format
_log_file_changes(modified_files, added_files, renamed_files)
# if self.handling_private_repositories:
# artifacts_folder = os.getenv("ARTIFACTS_FOLDER", "")
# logs_dir = (
# Path(artifacts_folder) / "logs" if artifacts_folder else Path("logs")
# )
#
# status_files = [
# logs_dir / PRIVATE_REPO_STATUS_FILE_PRIVATE,
# logs_dir / PRIVATE_REPO_STATUS_FILE_TEST_CONF,
# logs_dir / PRIVATE_REPO_STATUS_FILE_CONFIGURATION,
# ]
#
# for status_file in status_files:
# _process_status_file(
# status_file, modified_files, added_files, renamed_files
# )
#
# # Log files in a more readable format
# _log_file_changes(modified_files, added_files, renamed_files)

return modified_files, added_files, renamed_files

Expand Down
Loading
Loading