From 0e744013e3d5d1254479ddec61c233e62df2a455 Mon Sep 17 00:00:00 2001 From: Wangmo Tenzing Date: Mon, 3 Feb 2025 14:23:31 -0800 Subject: [PATCH 1/6] change native_lib_database to dictionary of dicts --- surfactant/infoextractors/native_lib_file.py | 32 ++++++++++++++------ 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/surfactant/infoextractors/native_lib_file.py b/surfactant/infoextractors/native_lib_file.py index 30a374ef..7c3478da 100644 --- a/surfactant/infoextractors/native_lib_file.py +++ b/surfactant/infoextractors/native_lib_file.py @@ -1,6 +1,7 @@ import json import os import re +from pathlib import Path from typing import Any, Dict, List, Optional, Union import requests @@ -16,16 +17,25 @@ def __init__(self) -> None: self.native_lib_database: Optional[Dict[str, Any]] = None def load_db(self) -> None: - native_lib_file = ConfigManager().get_data_dir_path() / "native_lib_patterns" / "emba.json" + native_lib_folder = ConfigManager().get_data_dir_path() / "native_lib_patterns" + self.native_lib_database = {} # Is a dict of dicts, each inner dict is one json file + + if native_lib_folder.exists(): + # See how many .json files there are in the folder + for file in native_lib_folder.glob("*.json"): + try: + with open(file, "r") as regex: + patterns = json.load(regex) + self.native_lib_database.append(patterns) + except FileNotFoundError: + logger.warning( + "" + ) + else: + print("No JSON files found. Run `surfactant plugin update-db native_lib_patterns` to fetch the pattern database or place private JSON patterns at this location: __.") + + print("printing out native_lib_database: ", self.native_lib_database) - try: - with open(native_lib_file, "r") as regex: - self.native_lib_database = json.load(regex) - except FileNotFoundError: - logger.warning( - "Native library pattern could not be loaded. Run `surfactant plugin update-db native_lib_patterns` to fetch the pattern database." - ) - self.native_lib_database = None def get_database(self) -> Optional[Dict[str, Any]]: return self.native_lib_database @@ -210,3 +220,7 @@ def init_hook(command_name: Optional[str] = None) -> None: logger.info("Initializing native_lib_file...") native_lib_manager.load_db() logger.info("Initializing native_lib_file complete.") + + # Create native_lib_patterns folder for storing JSON DB's + path = ConfigManager().get_data_dir_path() / "native_lib_patterns" + path.mkdir(parents=True, exist_ok=True) \ No newline at end of file From 55adaadf2fb9b884c804456171526d1cbdc1219d Mon Sep 17 00:00:00 2001 From: Wangmo Tenzing Date: Tue, 4 Feb 2025 12:21:52 -0800 Subject: [PATCH 2/6] changing nested dictionary --- surfactant/infoextractors/native_lib_file.py | 31 ++++++++++++-------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/surfactant/infoextractors/native_lib_file.py b/surfactant/infoextractors/native_lib_file.py index 7c3478da..d09f1f94 100644 --- a/surfactant/infoextractors/native_lib_file.py +++ b/surfactant/infoextractors/native_lib_file.py @@ -26,7 +26,7 @@ def load_db(self) -> None: try: with open(file, "r") as regex: patterns = json.load(regex) - self.native_lib_database.append(patterns) + self.native_lib_database[file.stem] = patterns except FileNotFoundError: logger.warning( "" @@ -34,7 +34,7 @@ def load_db(self) -> None: else: print("No JSON files found. Run `surfactant plugin update-db native_lib_patterns` to fetch the pattern database or place private JSON patterns at this location: __.") - print("printing out native_lib_database: ", self.native_lib_database) + #print(self.native_lib_database) def get_database(self) -> Optional[Dict[str, Any]]: @@ -61,6 +61,10 @@ def extract_native_lib_info(filename: str) -> Optional[Dict[str, Any]]: native_lib_info: Dict[str, Any] = {"nativeLibraries": []} native_lib_database = native_lib_manager.get_database() + #print(native_lib_database) + print("this is the length") + print(len(native_lib_database)) + if native_lib_database is None: return None @@ -104,17 +108,18 @@ def match_by_attribute( attribute: str, content: Union[str, bytes], patterns_database: Dict[str, Any] ) -> List[Dict[str, Any]]: libs: List[Dict[str, str]] = [] - for lib_name, lib_info in patterns_database.items(): - if attribute in lib_info: - for pattern in lib_info[attribute]: - if attribute == "filename": - if pattern.lower() == content.lower(): - libs.append({"isLibrary": lib_name}) - - elif attribute == "filecontent": - matches = re.search(pattern.encode("utf-8"), content) - if matches: - libs.append({"containsLibrary": lib_name}) + for database_name, database_info in patterns_database.items(): + for lib_name, lib_info in database_info.items(): + if attribute in lib_info: + for pattern in lib_info[attribute]: + if attribute == "filename": + if pattern.lower() == content.lower(): + libs.append({"isLibrary": lib_name}) + + elif attribute == "filecontent": + matches = re.search(pattern.encode("utf-8"), content) + if matches: + libs.append({"containsLibrary": lib_name}) return libs From 2bbd89ae417488da54ab79654b0e93dc8586236d Mon Sep 17 00:00:00 2001 From: Wangmo Tenzing Date: Tue, 4 Feb 2025 18:47:30 -0800 Subject: [PATCH 3/6] update load_db() --- surfactant/infoextractors/native_lib_file.py | 35 +++++++++----------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/surfactant/infoextractors/native_lib_file.py b/surfactant/infoextractors/native_lib_file.py index d09f1f94..057999f4 100644 --- a/surfactant/infoextractors/native_lib_file.py +++ b/surfactant/infoextractors/native_lib_file.py @@ -20,22 +20,23 @@ def load_db(self) -> None: native_lib_folder = ConfigManager().get_data_dir_path() / "native_lib_patterns" self.native_lib_database = {} # Is a dict of dicts, each inner dict is one json file - if native_lib_folder.exists(): + # Check if there are files in the folder. Ignores hidden files + if not any(f for f in native_lib_folder.iterdir() if not f.name.startswith(".")): + logger.warning( + "No JSON files found. Run `surfactant plugin update-db native_lib_patterns` to fetch the pattern database or place private JSON DB at location: __." + ) + self.native_lib_database = None + + else: # See how many .json files there are in the folder for file in native_lib_folder.glob("*.json"): - try: - with open(file, "r") as regex: - patterns = json.load(regex) - self.native_lib_database[file.stem] = patterns - except FileNotFoundError: - logger.warning( - "" - ) - else: - print("No JSON files found. Run `surfactant plugin update-db native_lib_patterns` to fetch the pattern database or place private JSON patterns at this location: __.") - - #print(self.native_lib_database) - + try: + with open(file, "r") as regex: + patterns = json.load(regex) + self.native_lib_database[file.stem] = patterns + except json.JSONDecodeError: + logger.error(f"Failed to decode JSON in file: {file}" + ) def get_database(self) -> Optional[Dict[str, Any]]: return self.native_lib_database @@ -60,10 +61,6 @@ def extract_file_info( def extract_native_lib_info(filename: str) -> Optional[Dict[str, Any]]: native_lib_info: Dict[str, Any] = {"nativeLibraries": []} native_lib_database = native_lib_manager.get_database() - - #print(native_lib_database) - print("this is the length") - print(len(native_lib_database)) if native_lib_database is None: return None @@ -108,7 +105,7 @@ def match_by_attribute( attribute: str, content: Union[str, bytes], patterns_database: Dict[str, Any] ) -> List[Dict[str, Any]]: libs: List[Dict[str, str]] = [] - for database_name, database_info in patterns_database.items(): + for _, database_info in patterns_database.items(): for lib_name, lib_info in database_info.items(): if attribute in lib_info: for pattern in lib_info[attribute]: From f56d659da5d927c81bb83bb52e8f44a9b4aa93ea Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 5 Feb 2025 03:01:45 +0000 Subject: [PATCH 4/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- surfactant/infoextractors/native_lib_file.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/surfactant/infoextractors/native_lib_file.py b/surfactant/infoextractors/native_lib_file.py index 057999f4..491075cc 100644 --- a/surfactant/infoextractors/native_lib_file.py +++ b/surfactant/infoextractors/native_lib_file.py @@ -1,7 +1,6 @@ import json import os import re -from pathlib import Path from typing import Any, Dict, List, Optional, Union import requests @@ -18,13 +17,13 @@ def __init__(self) -> None: def load_db(self) -> None: native_lib_folder = ConfigManager().get_data_dir_path() / "native_lib_patterns" - self.native_lib_database = {} # Is a dict of dicts, each inner dict is one json file + self.native_lib_database = {} # Is a dict of dicts, each inner dict is one json file # Check if there are files in the folder. Ignores hidden files if not any(f for f in native_lib_folder.iterdir() if not f.name.startswith(".")): logger.warning( "No JSON files found. Run `surfactant plugin update-db native_lib_patterns` to fetch the pattern database or place private JSON DB at location: __." - ) + ) self.native_lib_database = None else: @@ -35,8 +34,7 @@ def load_db(self) -> None: patterns = json.load(regex) self.native_lib_database[file.stem] = patterns except json.JSONDecodeError: - logger.error(f"Failed to decode JSON in file: {file}" - ) + logger.error(f"Failed to decode JSON in file: {file}") def get_database(self) -> Optional[Dict[str, Any]]: return self.native_lib_database @@ -61,7 +59,7 @@ def extract_file_info( def extract_native_lib_info(filename: str) -> Optional[Dict[str, Any]]: native_lib_info: Dict[str, Any] = {"nativeLibraries": []} native_lib_database = native_lib_manager.get_database() - + if native_lib_database is None: return None @@ -225,4 +223,4 @@ def init_hook(command_name: Optional[str] = None) -> None: # Create native_lib_patterns folder for storing JSON DB's path = ConfigManager().get_data_dir_path() / "native_lib_patterns" - path.mkdir(parents=True, exist_ok=True) \ No newline at end of file + path.mkdir(parents=True, exist_ok=True) From bf294f102227c1204ab41587d4a8f3e29967ea47 Mon Sep 17 00:00:00 2001 From: Wangmo Tenzing Date: Wed, 5 Feb 2025 12:00:50 -0800 Subject: [PATCH 5/6] checking pytest --- surfactant/infoextractors/native_lib_file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/surfactant/infoextractors/native_lib_file.py b/surfactant/infoextractors/native_lib_file.py index 491075cc..8491ad07 100644 --- a/surfactant/infoextractors/native_lib_file.py +++ b/surfactant/infoextractors/native_lib_file.py @@ -221,6 +221,6 @@ def init_hook(command_name: Optional[str] = None) -> None: native_lib_manager.load_db() logger.info("Initializing native_lib_file complete.") - # Create native_lib_patterns folder for storing JSON DB's + # Create native_lib_patterns folder for storing JSON DBs path = ConfigManager().get_data_dir_path() / "native_lib_patterns" path.mkdir(parents=True, exist_ok=True) From ac856d802382bac4e146e0ae3fce3d098c2daff4 Mon Sep 17 00:00:00 2001 From: Wangmo Tenzing Date: Wed, 5 Feb 2025 14:37:04 -0800 Subject: [PATCH 6/6] make sure pytest passes --- surfactant/infoextractors/native_lib_file.py | 37 ++++++++++++-------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/surfactant/infoextractors/native_lib_file.py b/surfactant/infoextractors/native_lib_file.py index 8491ad07..a12642ac 100644 --- a/surfactant/infoextractors/native_lib_file.py +++ b/surfactant/infoextractors/native_lib_file.py @@ -16,25 +16,32 @@ def __init__(self) -> None: self.native_lib_database: Optional[Dict[str, Any]] = None def load_db(self) -> None: - native_lib_folder = ConfigManager().get_data_dir_path() / "native_lib_patterns" - self.native_lib_database = {} # Is a dict of dicts, each inner dict is one json file + try: + native_lib_folder = ConfigManager().get_data_dir_path() / "native_lib_patterns" + self.native_lib_database = {} # Is a dict of dicts, each inner dict is one json file - # Check if there are files in the folder. Ignores hidden files - if not any(f for f in native_lib_folder.iterdir() if not f.name.startswith(".")): + # Check if there are files in the folder. Ignores hidden files + if not any(f for f in native_lib_folder.iterdir() if not f.name.startswith(".")): + logger.warning( + "No JSON files found. Run `surfactant plugin update-db native_lib_patterns` to fetch the pattern database or place private JSON DB at location: __." + ) + self.native_lib_database = None + + else: + # See how many .json files there are in the folder + for file in native_lib_folder.glob("*.json"): + try: + with open(file, "r") as regex: + patterns = json.load(regex) + self.native_lib_database[file.stem] = patterns + except json.JSONDecodeError: + logger.error(f"Failed to decode JSON in file: {file}") + except FileNotFoundError: logger.warning( - "No JSON files found. Run `surfactant plugin update-db native_lib_patterns` to fetch the pattern database or place private JSON DB at location: __." + "Native library patterns folder missing. Run `surfactant plugin update-db native_lib_patterns` to fetch the pattern database or place private JSON DB at location: __." ) self.native_lib_database = None - - else: - # See how many .json files there are in the folder - for file in native_lib_folder.glob("*.json"): - try: - with open(file, "r") as regex: - patterns = json.load(regex) - self.native_lib_database[file.stem] = patterns - except json.JSONDecodeError: - logger.error(f"Failed to decode JSON in file: {file}") + def get_database(self) -> Optional[Dict[str, Any]]: return self.native_lib_database