Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
name: CI

on:
pull_request:
push:
branches: [ master ]

concurrency:
group: ci-${{ github.ref }}
cancel-in-progress: true

jobs:
lint:
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.12'
cache: 'pip'
- run: pip install ruff==0.6.9
- run: ruff check .

tests:
strategy:
matrix:
python-version: ['3.9','3.10','3.11','3.12']
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
- run: pip install -e .[dev]
- run: pytest -q -k "not online"


16 changes: 16 additions & 0 deletions .github/workflows/docker-build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
name: Docker Build

on:
push:
branches: [ master, rc ]

jobs:
build:
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- uses: actions/checkout@v4
- run: docker build -t sherlock:test .


22 changes: 22 additions & 0 deletions .github/workflows/nightly-online.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name: Nightly Online Tests

on:
schedule:
- cron: '0 3 * * *'
workflow_dispatch:

jobs:
online:
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.12'
cache: 'pip'
- run: pip install -e .[dev]
- run: pytest -q -m online --maxfail=1


13 changes: 12 additions & 1 deletion docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,23 @@ sherlock user1 user2 user3
```

Accounts found will be stored in an individual text file with the corresponding username (e.g ```user123.txt```).
Use `--no-txt` to skip creating the `.txt` file.

### Output options

- **Disable txt output**: `--no-txt`
- **Write to a specific file (single user)**: `--output FILE`
- **Write per-user files into a folder (multiple users)**: `--folderoutput DIR`
- **Export CSV**: `--csv` (creates `username.csv`)
- **Export Excel**: `--xlsx` (creates `username.xlsx`)

```console
$ sherlock --help
usage: sherlock [-h] [--version] [--verbose] [--folderoutput FOLDEROUTPUT]
[--output OUTPUT] [--tor] [--unique-tor] [--csv] [--xlsx]
[--site SITE_NAME] [--proxy PROXY_URL] [--json JSON_FILE]
[--timeout TIMEOUT] [--print-all] [--print-found] [--no-color]
[--browse] [--local] [--nsfw]
[--browse] [--local] [--nsfw] [--no-txt] [--ignore-exclusions]
USERNAMES [USERNAMES ...]

Sherlock: Find Usernames Across Social Networks (Version 0.14.3)
Expand Down Expand Up @@ -96,6 +105,8 @@ optional arguments:
--browse, -b Browse to all results on default browser.
--local, -l Force the use of the local data.json file.
--nsfw Include checking of NSFW sites from default list.
--no-txt Disable creation of a txt file
--ignore-exclusions Ignore upstream exclusions (may return more false positives)
```
## Apify Actor Usage [![Sherlock Actor](https://apify.com/actor-badge?actor=netmilk/sherlock)](https://apify.com/netmilk/sherlock?fpr=sherlock)

Expand Down
42 changes: 40 additions & 2 deletions sherlock_project/sherlock.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

import csv
import signal
import pandas as pd
import os
import re
from argparse import ArgumentParser, RawDescriptionHelpFormatter
Expand All @@ -27,7 +26,9 @@
from typing import Optional

import requests
from requests.adapters import HTTPAdapter
from requests_futures.sessions import FuturesSession
from urllib3.util.retry import Retry

from sherlock_project.__init__ import (
__longname__,
Expand Down Expand Up @@ -110,6 +111,35 @@ def response_time(resp, *args, **kwargs):
)


def _mount_session_with_retries(session: requests.Session) -> requests.Session:
"""Configure retries, backoff, and connection pooling on a requests session."""
retry = Retry(
total=3,
backoff_factor=0.5,
status_forcelist=[429, 500, 502, 503, 504],
allowed_methods=["HEAD", "GET", "OPTIONS", "POST", "PUT"],
raise_on_status=False,
)
adapter = HTTPAdapter(pool_connections=100, pool_maxsize=100, max_retries=retry)
session.mount("http://", adapter)
session.mount("https://", adapter)
return session


def _is_valid_proxy_url(proxy_url: str) -> bool:
"""Basic validation for proxy URLs.

Accept common schemes and require a netloc.
"""
from urllib.parse import urlparse

parsed = urlparse(proxy_url)
if parsed.scheme not in {"http", "https", "socks5", "socks5h", "socks4"}:
return False
if not parsed.netloc:
return False
return True

def get_response(request_future, error_type, social_network):
# Default for Response object if some failure occurs.
response = None
Expand Down Expand Up @@ -237,6 +267,9 @@ def sherlock(
underlying_session = requests.session()
underlying_request = requests.Request()

# Mount retries/connection pooling for robustness and performance
underlying_session = _mount_session_with_retries(underlying_session)

# Limit number of workers to 20.
# This is probably vastly overkill.
if len(site_data) >= 20:
Expand Down Expand Up @@ -756,12 +789,15 @@ def main():
print(f"A problem occurred while checking for an update: {error}")

# Argument check
# TODO regex check on args.proxy
if args.tor and (args.proxy is not None):
raise Exception("Tor and Proxy cannot be set at the same time.")

# Make prompts
if args.proxy is not None:
if not _is_valid_proxy_url(args.proxy):
raise ArgumentTypeError(
f"Invalid proxy URL: {args.proxy}. Expected scheme://host:port with scheme in http, https, socks4, socks5, socks5h."
)
print("Using the proxy: " + args.proxy)

if args.tor or args.unique_tor:
Expand Down Expand Up @@ -942,6 +978,8 @@ def main():
]
)
if args.xlsx:
# Lazy import to reduce startup time and optional dependency overhead
import pandas as pd
usernames = []
names = []
url_main = []
Expand Down
19 changes: 19 additions & 0 deletions tests/test_ux.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,25 @@ def test_wildcard_username_expansion():
assert sherlock.multiple_usernames('test{?}test') == ["test_test" , "test-test" , "test.test"]


def test_no_txt_flag(tmp_path):
"""Ensure that --no-txt prevents creating the txt output file."""
username = "totallyfakeuserforunittest"
# Run in a temp working directory to avoid polluting local repo
import os
cwd = os.getcwd()
try:
os.chdir(tmp_path)
# Run with --no-txt; expect no username.txt file created
try:
Interactives.run_cli(f"--no-txt {username}")
except Exception:
# Ignore network errors; only assert file is not created
pass
assert not os.path.exists(f"{username}.txt")
finally:
os.chdir(cwd)


@pytest.mark.parametrize('cliargs', [
'',
'--site urghrtuight --egiotr',
Expand Down