From 5b38e94e4ff967948d6a8ef063f3f72eec0388e8 Mon Sep 17 00:00:00 2001 From: shreyasNaik0101 Date: Sat, 4 Oct 2025 01:01:32 +0530 Subject: [PATCH 1/5] A feat: Implement site name aliases --- .actor/dataset_schema.json | 2 +- sherlock_project/resources/data.json | 1 + sherlock_project/resources/data.schema.json | 289 ++++++++++---------- sherlock_project/sherlock.py | 30 +- 4 files changed, 169 insertions(+), 153 deletions(-) diff --git a/.actor/dataset_schema.json b/.actor/dataset_schema.json index 9edce2f87..eab9c0662 100644 --- a/.actor/dataset_schema.json +++ b/.actor/dataset_schema.json @@ -29,7 +29,7 @@ "fields": [ "username", "links" - ], + ] }, "display": { "component": "table", diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 49750d3e9..5fae05724 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -2263,6 +2263,7 @@ "
User ", "429 Too Many Requests" ], + "aliases": ["X"], "errorType": "message", "regexCheck": "^[a-zA-Z0-9_]{1,15}$", "url": "https://x.com/{}", diff --git a/sherlock_project/resources/data.schema.json b/sherlock_project/resources/data.schema.json index c717cb256..44fea83ab 100644 --- a/sherlock_project/resources/data.schema.json +++ b/sherlock_project/resources/data.schema.json @@ -1,149 +1,154 @@ { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "title": "Sherlock Target Manifest", - "description": "Social media targets to probe for the existence of known usernames", - "type": "object", - "properties": { - "$schema": { "type": "string" } - }, - "patternProperties": { - "^(?!\\$).*?$": { - "type": "object", - "description": "Target name and associated information (key should be human readable name)", - "required": ["url", "urlMain", "errorType", "username_claimed"], - "properties": { - "url": { "type": "string" }, - "urlMain": { "type": "string" }, - "urlProbe": { "type": "string" }, - "username_claimed": { "type": "string" }, - "regexCheck": { "type": "string" }, - "isNSFW": { "type": "boolean" }, - "headers": { "type": "object" }, - "request_payload": { "type": "object" }, - "__comment__": { - "type": "string", - "description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock." - }, - "tags": { - "oneOf": [ - { "$ref": "#/$defs/tag" }, - { "type": "array", "items": { "$ref": "#/$defs/tag" } } - ] - }, - "request_method": { - "type": "string", - "enum": ["GET", "POST", "HEAD", "PUT"] - }, - "errorType": { - "oneOf": [ - { - "type": "string", - "enum": ["message", "response_url", "status_code"] - }, - { - "type": "array", - "items": { - "type": "string", - "enum": ["message", "response_url", "status_code"] - } - } - ] - }, - "errorMsg": { - "oneOf": [ - { "type": "string" }, - { "type": "array", "items": { "type": "string" } } - ] - }, - "errorCode": { - "oneOf": [ - { "type": "integer" }, - { "type": "array", "items": { "type": "integer" } } - ] - }, - "errorUrl": { "type": "string" }, - "response_url": { "type": "string" } - }, - "dependencies": { - "errorMsg": { - "oneOf": [ - { "properties": { "errorType": { "const": "message" } } }, - { - "properties": { - "errorType": { - "type": "array", - "contains": { "const": "message" } - } - } - } - ] - }, - "errorUrl": { - "oneOf": [ - { "properties": { "errorType": { "const": "response_url" } } }, - { - "properties": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "Sherlock Target Manifest", + "description": "Social media targets to probe for the existence of known usernames", + "type": "object", + "properties": { + "$schema": { "type": "string" } + }, + "patternProperties": { + "^(?!\\$).*?$": { + "type": "object", + "description": "Target name and associated information (key should be human readable name)", + "required": ["url", "urlMain", "errorType", "username_claimed"], + "properties": { + "url": { "type": "string" }, + "urlMain": { "type": "string" }, + "urlProbe": { "type": "string" }, + "username_claimed": { "type": "string" }, + "username_unclaimed": { "type": "string" }, + "regexCheck": { "type": "string" }, + "isNSFW": { "type": "boolean" }, + "headers": { "type": "object" }, + "request_payload": { "type": "object" }, + "__comment__": { + "type": "string", + "description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock." + }, + "tags": { + "oneOf": [ + { "$ref": "#/$defs/tag" }, + { "type": "array", "items": { "$ref": "#/$defs/tag" } } + ] + }, + "request_method": { + "type": "string", + "enum": ["GET", "POST", "HEAD", "PUT"] + }, "errorType": { - "type": "array", - "contains": { "const": "response_url" } - } - } - } - ] - }, - "errorCode": { - "oneOf": [ - { "properties": { "errorType": { "const": "status_code" } } }, - { - "properties": { - "errorType": { - "type": "array", - "contains": { "const": "status_code" } - } - } - } - ] - } - }, - "allOf": [ - { - "if": { - "anyOf": [ - { "properties": { "errorType": { "const": "message" } } }, - { - "properties": { - "errorType": { + "oneOf": [ + { + "type": "string", + "enum": ["message", "response_url", "status_code"] + }, + { + "type": "array", + "items": { + "type": "string", + "enum": ["message", "response_url", "status_code"] + } + } + ] + }, + "errorMsg": { + "oneOf": [ + { "type": "string" }, + { "type": "array", "items": { "type": "string" } } + ] + }, + "errorCode": { + "oneOf": [ + { "type": "integer" }, + { "type": "array", "items": { "type": "integer" } } + ] + }, + "errorUrl": { "type": "string" }, + "response_url": { "type": "string" }, + "aliases": { "type": "array", - "contains": { "const": "message" } - } + "items": { "type": "string" } } - } - ] - }, - "then": { "required": ["errorMsg"] } - }, - { - "if": { - "anyOf": [ - { "properties": { "errorType": { "const": "response_url" } } }, - { - "properties": { - "errorType": { - "type": "array", - "contains": { "const": "response_url" } - } + }, + "dependencies": { + "errorMsg": { + "oneOf": [ + { "properties": { "errorType": { "const": "message" } } }, + { + "properties": { + "errorType": { + "type": "array", + "contains": { "const": "message" } + } + } + } + ] + }, + "errorUrl": { + "oneOf": [ + { "properties": { "errorType": { "const": "response_url" } } }, + { + "properties": { + "errorType": { + "type": "array", + "contains": { "const": "response_url" } + } + } + } + ] + }, + "errorCode": { + "oneOf": [ + { "properties": { "errorType": { "const": "status_code" } } }, + { + "properties": { + "errorType": { + "type": "array", + "contains": { "const": "status_code" } + } + } + } + ] + } + }, + "allOf": [ + { + "if": { + "anyOf": [ + { "properties": { "errorType": { "const": "message" } } }, + { + "properties": { + "errorType": { + "type": "array", + "contains": { "const": "message" } + } + } + } + ] + }, + "then": { "required": ["errorMsg"] } + }, + { + "if": { + "anyOf": [ + { "properties": { "errorType": { "const": "response_url" } } }, + { + "properties": { + "errorType": { + "type": "array", + "contains": { "const": "response_url" } + } + } + } + ] + }, + "then": { "required": ["errorUrl"] } } - } - ] - }, - "then": { "required": ["errorUrl"] } + ], + "additionalProperties": false } - ], - "additionalProperties": false + }, + "additionalProperties": false, + "$defs": { + "tag": { "type": "string", "enum": ["adult", "gaming"] } } - }, - "additionalProperties": false, - "$defs": { - "tag": { "type": "string", "enum": ["adult", "gaming"] } - } -} +} \ No newline at end of file diff --git a/sherlock_project/sherlock.py b/sherlock_project/sherlock.py index 75b3e3d70..de8eddec3 100644 --- a/sherlock_project/sherlock.py +++ b/sherlock_project/sherlock.py @@ -783,7 +783,7 @@ def main(): # Eventually, the rest of the code will be updated to use the new object # directly, but this will glue the two pieces together. site_data_all = {site.name: site.information for site in sites} - if args.site_list == []: + if not args.site_list: # Not desired to look at a sub-set of sites site_data = site_data_all else: @@ -791,15 +791,25 @@ def main(): # Make sure that the sites are supported & build up pruned site database. site_data = {} site_missing = [] - for site in args.site_list: - counter = 0 - for existing_site in site_data_all: - if site.lower() == existing_site.lower(): - site_data[existing_site] = site_data_all[existing_site] - counter += 1 - if counter == 0: - # Build up list of sites not supported for future error message. - site_missing.append(f"'{site}'") + + # Create a mapping from all site names and aliases (in lowercase) to their proper names + site_map = {} + for site_name, site_info in site_data_all.items(): + site_map[site_name.lower()] = site_name + if "aliases" in site_info: + for alias in site_info["aliases"]: + site_map[alias.lower()] = site_name + + for site_name_from_user in args.site_list: + # Find the proper site name from the user's input (which could be an alias) + proper_site_name = site_map.get(site_name_from_user.lower()) + + if proper_site_name: + # If a match was found, add the site's data to our list + site_data[proper_site_name] = site_data_all[proper_site_name] + else: + # If no match was found for the name or any alias + site_missing.append(f"'{site_name_from_user}'") if site_missing: print(f"Error: Desired sites not found: {', '.join(site_missing)}.") From 91ba5a46cd04e12b39953cfc973dca7f816ddb01 Mon Sep 17 00:00:00 2001 From: shreyasNaik0101 Date: Mon, 6 Oct 2025 23:32:26 +0530 Subject: [PATCH 2/5] feat: Implement working site alias logic Co-authored-by: obiwan04kanobi --- sherlock_project/resources/data.json | 1 - sherlock_project/sherlock.py | 6 +++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 5fae05724..a1fcbcfa2 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -2268,7 +2268,6 @@ "regexCheck": "^[a-zA-Z0-9_]{1,15}$", "url": "https://x.com/{}", "urlMain": "https://x.com/", - "urlProbe": "https://nitter.privacydev.net/{}", "username_claimed": "blue" }, "Typeracer": { diff --git a/sherlock_project/sherlock.py b/sherlock_project/sherlock.py index de8eddec3..0365d4e2d 100644 --- a/sherlock_project/sherlock.py +++ b/sherlock_project/sherlock.py @@ -783,7 +783,11 @@ def main(): # Eventually, the rest of the code will be updated to use the new object # directly, but this will glue the two pieces together. site_data_all = {site.name: site.information for site in sites} - if not args.site_list: + # Create original dictionary from SitesInformation() object. + # Eventually, the rest of the code will be updated to use the new object + # directly, but this will glue the two pieces together. + site_data_all = {site.name: site.information for site in sites} + if args.site_list == []: # Not desired to look at a sub-set of sites site_data = site_data_all else: From cc298d1cbf517b255c341f2123c9cb61f3510d50 Mon Sep 17 00:00:00 2001 From: shreyasNaik0101 Date: Tue, 7 Oct 2025 02:40:50 +0530 Subject: [PATCH 3/5] style: Revert indentation to 2 spaces --- sherlock_project/resources/data.schema.json | 294 ++++++++++---------- 1 file changed, 147 insertions(+), 147 deletions(-) diff --git a/sherlock_project/resources/data.schema.json b/sherlock_project/resources/data.schema.json index 44fea83ab..c63efae4e 100644 --- a/sherlock_project/resources/data.schema.json +++ b/sherlock_project/resources/data.schema.json @@ -1,154 +1,154 @@ { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "title": "Sherlock Target Manifest", - "description": "Social media targets to probe for the existence of known usernames", - "type": "object", - "properties": { - "$schema": { "type": "string" } - }, - "patternProperties": { - "^(?!\\$).*?$": { - "type": "object", - "description": "Target name and associated information (key should be human readable name)", - "required": ["url", "urlMain", "errorType", "username_claimed"], - "properties": { - "url": { "type": "string" }, - "urlMain": { "type": "string" }, - "urlProbe": { "type": "string" }, - "username_claimed": { "type": "string" }, - "username_unclaimed": { "type": "string" }, - "regexCheck": { "type": "string" }, - "isNSFW": { "type": "boolean" }, - "headers": { "type": "object" }, - "request_payload": { "type": "object" }, - "__comment__": { - "type": "string", - "description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock." - }, - "tags": { - "oneOf": [ - { "$ref": "#/$defs/tag" }, - { "type": "array", "items": { "$ref": "#/$defs/tag" } } - ] - }, - "request_method": { - "type": "string", - "enum": ["GET", "POST", "HEAD", "PUT"] - }, + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "Sherlock Target Manifest", + "description": "Social media targets to probe for the existence of known usernames", + "type": "object", + "properties": { + "$schema": { "type": "string" } + }, + "patternProperties": { + "^(?!\\$).*?$": { + "type": "object", + "description": "Target name and associated information (key should be human readable name)", + "required": ["url", "urlMain", "errorType", "username_claimed"], + "properties": { + "url": { "type": "string" }, + "urlMain": { "type": "string" }, + "urlProbe": { "type": "string" }, + "username_claimed": { "type": "string" }, + "username_unclaimed": { "type": "string" }, + "regexCheck": { "type": "string" }, + "isNSFW": { "type": "boolean" }, + "headers": { "type": "object" }, + "request_payload": { "type": "object" }, + "__comment__": { + "type": "string", + "description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock." + }, + "tags": { + "oneOf": [ + { "$ref": "#/$defs/tag" }, + { "type": "array", "items": { "$ref": "#/$defs/tag" } } + ] + }, + "request_method": { + "type": "string", + "enum": ["GET", "POST", "HEAD", "PUT"] + }, + "errorType": { + "oneOf": [ + { + "type": "string", + "enum": ["message", "response_url", "status_code"] + }, + { + "type": "array", + "items": { + "type": "string", + "enum": ["message", "response_url", "status_code"] + } + } + ] + }, + "errorMsg": { + "oneOf": [ + { "type": "string" }, + { "type": "array", "items": { "type": "string" } } + ] + }, + "errorCode": { + "oneOf": [ + { "type": "integer" }, + { "type": "array", "items": { "type": "integer" } } + ] + }, + "errorUrl": { "type": "string" }, + "response_url": { "type": "string" }, + "aliases": { + "type": "array", + "items": { "type": "string" } + } + }, + "dependencies": { + "errorMsg": { + "oneOf": [ + { "properties": { "errorType": { "const": "message" } } }, + { + "properties": { "errorType": { - "oneOf": [ - { - "type": "string", - "enum": ["message", "response_url", "status_code"] - }, - { - "type": "array", - "items": { - "type": "string", - "enum": ["message", "response_url", "status_code"] - } - } - ] - }, - "errorMsg": { - "oneOf": [ - { "type": "string" }, - { "type": "array", "items": { "type": "string" } } - ] - }, - "errorCode": { - "oneOf": [ - { "type": "integer" }, - { "type": "array", "items": { "type": "integer" } } - ] - }, - "errorUrl": { "type": "string" }, - "response_url": { "type": "string" }, - "aliases": { - "type": "array", - "items": { "type": "string" } + "type": "array", + "contains": { "const": "message" } } - }, - "dependencies": { - "errorMsg": { - "oneOf": [ - { "properties": { "errorType": { "const": "message" } } }, - { - "properties": { - "errorType": { - "type": "array", - "contains": { "const": "message" } - } - } - } - ] - }, - "errorUrl": { - "oneOf": [ - { "properties": { "errorType": { "const": "response_url" } } }, - { - "properties": { - "errorType": { - "type": "array", - "contains": { "const": "response_url" } - } - } - } - ] - }, - "errorCode": { - "oneOf": [ - { "properties": { "errorType": { "const": "status_code" } } }, - { - "properties": { - "errorType": { - "type": "array", - "contains": { "const": "status_code" } - } - } - } - ] + } + } + ] + }, + "errorUrl": { + "oneOf": [ + { "properties": { "errorType": { "const": "response_url" } } }, + { + "properties": { + "errorType": { + "type": "array", + "contains": { "const": "response_url" } } - }, - "allOf": [ - { - "if": { - "anyOf": [ - { "properties": { "errorType": { "const": "message" } } }, - { - "properties": { - "errorType": { - "type": "array", - "contains": { "const": "message" } - } - } - } - ] - }, - "then": { "required": ["errorMsg"] } - }, - { - "if": { - "anyOf": [ - { "properties": { "errorType": { "const": "response_url" } } }, - { - "properties": { - "errorType": { - "type": "array", - "contains": { "const": "response_url" } - } - } - } - ] - }, - "then": { "required": ["errorUrl"] } + } + } + ] + }, + "errorCode": { + "oneOf": [ + { "properties": { "errorType": { "const": "status_code" } } }, + { + "properties": { + "errorType": { + "type": "array", + "contains": { "const": "status_code" } + } + } + } + ] + } + }, + "allOf": [ + { + "if": { + "anyOf": [ + { "properties": { "errorType": { "const": "message" } } }, + { + "properties": { + "errorType": { + "type": "array", + "contains": { "const": "message" } + } + } + } + ] + }, + "then": { "required": ["errorMsg"] } + }, + { + "if": { + "anyOf": [ + { "properties": { "errorType": { "const": "response_url" } } }, + { + "properties": { + "errorType": { + "type": "array", + "contains": { "const": "response_url" } + } } - ], - "additionalProperties": false + } + ] + }, + "then": { "required": ["errorUrl"] } } - }, - "additionalProperties": false, - "$defs": { - "tag": { "type": "string", "enum": ["adult", "gaming"] } + ], + "additionalProperties": false } -} \ No newline at end of file + }, + "additionalProperties": false, + "$defs": { + "tag": { "type": "string", "enum": ["adult", "gaming"] } + } +} From 818cab7f5baa309e744c0677f347c33eefe97329 Mon Sep 17 00:00:00 2001 From: shreyasNaik0101 Date: Tue, 7 Oct 2025 02:45:04 +0530 Subject: [PATCH 4/5] fix: Remove duplicate code block --- sherlock_project/sherlock.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sherlock_project/sherlock.py b/sherlock_project/sherlock.py index 0365d4e2d..9316edb03 100644 --- a/sherlock_project/sherlock.py +++ b/sherlock_project/sherlock.py @@ -779,10 +779,7 @@ def main(): if not args.nsfw: sites.remove_nsfw_sites(do_not_remove=args.site_list) - # Create original dictionary from SitesInformation() object. - # Eventually, the rest of the code will be updated to use the new object - # directly, but this will glue the two pieces together. - site_data_all = {site.name: site.information for site in sites} + # Create original dictionary from SitesInformation() object. # Eventually, the rest of the code will be updated to use the new object # directly, but this will glue the two pieces together. From 184681eb06a98b42bab08798d7a7ba857fbc9eba Mon Sep 17 00:00:00 2001 From: shreyasNaik0101 Date: Wed, 8 Oct 2025 19:31:46 +0530 Subject: [PATCH 5/5] Update data.schema.json Remove deprecated username_unclaimed from schema --- sherlock_project/resources/data.schema.json | 1 - 1 file changed, 1 deletion(-) diff --git a/sherlock_project/resources/data.schema.json b/sherlock_project/resources/data.schema.json index c63efae4e..cd0fb3a85 100644 --- a/sherlock_project/resources/data.schema.json +++ b/sherlock_project/resources/data.schema.json @@ -16,7 +16,6 @@ "urlMain": { "type": "string" }, "urlProbe": { "type": "string" }, "username_claimed": { "type": "string" }, - "username_unclaimed": { "type": "string" }, "regexCheck": { "type": "string" }, "isNSFW": { "type": "boolean" }, "headers": { "type": "object" },