From f9c6574d08325a95cd4be4a33dfc601bb2faa310 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Mon, 15 Jan 2024 03:49:44 +0100 Subject: [PATCH] First draft of AyloAPI --- .github/workflows/main.yml | 40 + .gitignore | 5 + build_site.sh | 89 ++ scrapers/AyloAPI/README.md | 34 + scrapers/AyloAPI/aylo_tokens.json | 446 +++++++ scrapers/AyloAPI/config.py | 11 + scrapers/AyloAPI/domains.py | 67 + scrapers/AyloAPI/package | 2 + scrapers/AyloAPI/scrape.py | 907 +++++++++++++ scrapers/AyloAPI/slugger.py | 82 ++ scrapers/AyloAPI/test_cases.md | 514 ++++++++ scrapers/Babes.yml | 64 + scrapers/BangBros/BangBros.py | 90 ++ scrapers/BangBros/BangBros.yml | 60 + scrapers/Brazzers/Brazzers.py | 77 ++ scrapers/Brazzers/Brazzers.yml | 61 + scrapers/Bromo/Bromo.py | 51 + scrapers/Bromo/Bromo.yml | 60 + scrapers/CzechHunter/CzechHunter.py | 69 + scrapers/CzechHunter/CzechHunter.yml | 68 + scrapers/Deviante/Deviante.py | 89 ++ scrapers/Deviante/Deviante.yml | 80 ++ .../DigitalPlayground/DigitalPlayground.py | 61 + .../DigitalPlayground/DigitalPlayground.yml | 64 + scrapers/Erito.yml | 64 + scrapers/FakeHub/FakeHub.py | 74 ++ scrapers/FakeHub/FakeHub.yml | 72 ++ scrapers/GayWire/GayWire.py | 85 ++ scrapers/GayWire/GayWire.yml | 58 + scrapers/HentaiPros.yml | 64 + scrapers/Men/Men.py | 82 ++ scrapers/Men/Men.yml | 100 ++ scrapers/MetroHD/MetroHD.py | 85 ++ scrapers/MetroHD/MetroHD.yml | 73 ++ .../MileHighMedia_BiandTrans.py | 73 ++ .../MileHighMedia_BiandTrans.yml | 64 + .../MileHighMedia_Gay/MileHighMedia_Gay.py | 67 + .../MileHighMedia_Gay/MileHighMedia_Gay.yml | 64 + .../MileHighMedia_Straight.py | 93 ++ .../MileHighMedia_Straight.yml | 88 ++ scrapers/Mofos/Mofos.py | 73 ++ scrapers/Mofos/Mofos.yml | 76 ++ scrapers/PropertySex/PropertySex.py | 66 + scrapers/PropertySex/PropertySex.yml | 64 + scrapers/RealityDudes/RealityDudes.py | 67 + scrapers/RealityDudes/RealityDudes.yml | 68 + scrapers/RealityKings/RealityKings.py | 71 + scrapers/RealityKings/RealityKings.yml | 172 +++ scrapers/SeanCody/SeanCody.py | 53 + scrapers/SeanCody/SeanCody.yml | 60 + scrapers/SexyHub/SexyHub.py | 71 + scrapers/SexyHub/SexyHub.yml | 68 + scrapers/Squirted.yml | 65 + scrapers/TransAngels.yml | 72 ++ scrapers/TrueAmateurs.yml | 64 + scrapers/Tube8Vip/Tube8Vip.py | 58 + scrapers/Tube8Vip/Tube8Vip.yml | 60 + scrapers/Twistys/Twistys.py | 82 ++ scrapers/Twistys/Twistys.yml | 64 + scrapers/WhyNotBi/WhyNotBi.py | 54 + scrapers/WhyNotBi/WhyNotBi.yml | 60 + scrapers/py_common/config.py | 7 + scrapers/py_common/graphql.py | 1149 +++++++++++++++++ scrapers/py_common/log.py | 39 + scrapers/py_common/package | 2 + scrapers/py_common/types.py | 116 ++ scrapers/py_common/util.py | 258 ++++ 67 files changed, 7356 insertions(+) create mode 100644 .github/workflows/main.yml create mode 100644 .gitignore create mode 100755 build_site.sh create mode 100644 scrapers/AyloAPI/README.md create mode 100644 scrapers/AyloAPI/aylo_tokens.json create mode 100644 scrapers/AyloAPI/config.py create mode 100644 scrapers/AyloAPI/domains.py create mode 100644 scrapers/AyloAPI/package create mode 100644 scrapers/AyloAPI/scrape.py create mode 100644 scrapers/AyloAPI/slugger.py create mode 100644 scrapers/AyloAPI/test_cases.md create mode 100644 scrapers/Babes.yml create mode 100644 scrapers/BangBros/BangBros.py create mode 100644 scrapers/BangBros/BangBros.yml create mode 100644 scrapers/Brazzers/Brazzers.py create mode 100644 scrapers/Brazzers/Brazzers.yml create mode 100644 scrapers/Bromo/Bromo.py create mode 100644 scrapers/Bromo/Bromo.yml create mode 100644 scrapers/CzechHunter/CzechHunter.py create mode 100644 scrapers/CzechHunter/CzechHunter.yml create mode 100644 scrapers/Deviante/Deviante.py create mode 100644 scrapers/Deviante/Deviante.yml create mode 100644 scrapers/DigitalPlayground/DigitalPlayground.py create mode 100644 scrapers/DigitalPlayground/DigitalPlayground.yml create mode 100644 scrapers/Erito.yml create mode 100644 scrapers/FakeHub/FakeHub.py create mode 100644 scrapers/FakeHub/FakeHub.yml create mode 100644 scrapers/GayWire/GayWire.py create mode 100644 scrapers/GayWire/GayWire.yml create mode 100644 scrapers/HentaiPros.yml create mode 100644 scrapers/Men/Men.py create mode 100644 scrapers/Men/Men.yml create mode 100644 scrapers/MetroHD/MetroHD.py create mode 100644 scrapers/MetroHD/MetroHD.yml create mode 100644 scrapers/MileHighMedia_BiandTrans/MileHighMedia_BiandTrans.py create mode 100644 scrapers/MileHighMedia_BiandTrans/MileHighMedia_BiandTrans.yml create mode 100644 scrapers/MileHighMedia_Gay/MileHighMedia_Gay.py create mode 100644 scrapers/MileHighMedia_Gay/MileHighMedia_Gay.yml create mode 100644 scrapers/MileHighMedia_Straight/MileHighMedia_Straight.py create mode 100644 scrapers/MileHighMedia_Straight/MileHighMedia_Straight.yml create mode 100644 scrapers/Mofos/Mofos.py create mode 100644 scrapers/Mofos/Mofos.yml create mode 100644 scrapers/PropertySex/PropertySex.py create mode 100644 scrapers/PropertySex/PropertySex.yml create mode 100644 scrapers/RealityDudes/RealityDudes.py create mode 100644 scrapers/RealityDudes/RealityDudes.yml create mode 100644 scrapers/RealityKings/RealityKings.py create mode 100644 scrapers/RealityKings/RealityKings.yml create mode 100644 scrapers/SeanCody/SeanCody.py create mode 100644 scrapers/SeanCody/SeanCody.yml create mode 100644 scrapers/SexyHub/SexyHub.py create mode 100644 scrapers/SexyHub/SexyHub.yml create mode 100644 scrapers/Squirted.yml create mode 100644 scrapers/TransAngels.yml create mode 100644 scrapers/TrueAmateurs.yml create mode 100644 scrapers/Tube8Vip/Tube8Vip.py create mode 100644 scrapers/Tube8Vip/Tube8Vip.yml create mode 100644 scrapers/Twistys/Twistys.py create mode 100644 scrapers/Twistys/Twistys.yml create mode 100644 scrapers/WhyNotBi/WhyNotBi.py create mode 100644 scrapers/WhyNotBi/WhyNotBi.yml create mode 100644 scrapers/py_common/config.py create mode 100644 scrapers/py_common/graphql.py create mode 100644 scrapers/py_common/log.py create mode 100644 scrapers/py_common/package create mode 100644 scrapers/py_common/types.py create mode 100644 scrapers/py_common/util.py diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..888910b --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,40 @@ +name: Deploy index to Github Pages + +on: + push: + branches: [ master ] + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages +permissions: + contents: read + pages: write + id-token: write + +jobs: + build: + runs-on: ubuntu-22.04 + steps: + - name: Checkout master + uses: actions/checkout@v2 + with: + path: master + ref: master + fetch-depth: '0' + - run: | + cd master + ./build_site.sh ../_site/ + - uses: actions/upload-pages-artifact@v2 + + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-22.04 + needs: build + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v2 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..04faab0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +# Scraper generated files +*.json + +# Index build artifact +/_site \ No newline at end of file diff --git a/build_site.sh b/build_site.sh new file mode 100755 index 0000000..4987225 --- /dev/null +++ b/build_site.sh @@ -0,0 +1,89 @@ +#!/bin/bash + +# builds a repository of scrapers +# outputs to _site with the following structure: +# index.yml +# .zip +# Each zip file contains the scraper.yml file and any other files in the same directory + +outdir="$1" +if [ -z "$outdir" ]; then + outdir="_site" +fi + +rm -rf "$outdir" +mkdir -p "$outdir" + +buildScraper() +{ + f=$1 + dir=$(dirname "$f") + + # get the scraper id from the filename + scraper_id=$(basename "$f" .yml) + versionFile=$f + if [ "$scraper_id" == "package" ]; then + scraper_id=$(basename "$dir") + fi + + if [ "$dir" != "./scrapers" ]; then + versionFile="$dir" + fi + + echo "Processing $scraper_id" + + # create a directory for the version + version=$(git log -n 1 --pretty=format:%h -- "$versionFile") + updated=$(TZ=UTC0 git log -n 1 --date="format-local:%F %T" --pretty=format:%ad -- "$versionFile") + + # create the zip file + # copy other files + zipfile=$(realpath "$outdir/$scraper_id.zip") + + name=$(grep "^name:" "$f" | cut -d' ' -f2- | sed -e 's/\r//' -e 's/^"\(.*\)"$/\1/') + ignore=$(grep "^# ignore:" "$f" | cut -c 10- | sed -e 's/\r//') + dep=$(grep "^# requires:" "$f" | cut -c 12- | sed -e 's/\r//') + + # always ignore package file + ignore="-x $ignore package" + + pushd "$dir" > /dev/null + if [ "$dir" != "./scrapers" ]; then + zip -r "$zipfile" . ${ignore} > /dev/null + else + zip "$zipfile" "$scraper_id.yml" > /dev/null + fi + popd > /dev/null + + # write to spec index + echo "- id: $scraper_id + name: $name + version: $version + date: $updated + path: $scraper_id.zip + sha256: $(sha256sum "$zipfile" | cut -d' ' -f1)" >> "$outdir"/index.yml + + # handle dependencies + if [ ! -z "$dep" ]; then + echo " requires:" >> "$outdir"/index.yml + for d in ${dep//,/ }; do + echo " - $d" >> "$outdir"/index.yml + done + fi + + echo "" >> "$outdir"/index.yml +} + +# find all yml files in ./scrapers - these are packages individually +for f in ./scrapers/*.yml; do + buildScraper "$f" +done + +find ./scrapers/ -mindepth 2 -name *.yml -print0 | while read -d $'\0' f; do + buildScraper "$f" +done + +# handle dependency packages +find ./scrapers/ -mindepth 2 -name package -print0 | while read -d $'\0' f; do + buildScraper "$f" +done diff --git a/scrapers/AyloAPI/README.md b/scrapers/AyloAPI/README.md new file mode 100644 index 0000000..9a7d6e0 --- /dev/null +++ b/scrapers/AyloAPI/README.md @@ -0,0 +1,34 @@ +# The Aylo API scraper + +This is arguably the biggest scraper in the repo and covers a _lot_ of networks and studios. It is +composed of one main file that contains the functions necessary to scrape scenes, movies, galleries +and performers from the Aylo API along with a few supporting files with functions that handle things +like constructing URL slugs and caching instance tokens. + +Design goals: + +- Split scrapers that can handle the individual complexities of subnetworks without overcomplicating the main scraper +- Easy to modify and understand: documentation, examples + +These functions are designed to be open for extension, but closed to modification: but what does this mean? +The networks and studios in the Aylo API differ in how they construct their URLs and even how +their parent/child studio relationships are expressed these functions could easily take on a lot of +complexity if they were to handle every special case. Instead these scraping functions return their +results in a standard format that works for most studios while also optionally taking a postprocessing +function that callers can supply to handle their special requirements. + +The standard URL formats that can vary: +scenes: `https://www..com/scene//` +movies: `https://www..com/movie//` +performers: `https://www..com/model//` + +`brand-domain` is based on the parent studio: `bangbros` for Bang Bros, `gaywire` for Gay Wire, +`bigstr` for BigStr (which has since consolidated under the Czech Hunter name, so those URLs are wrong!) + +Uses the `parse_args` helper from [py_common](../py_common/util.py) +Developed to be ergonomic for testing and integrating into other Python scripts + +The simplest case is exemplified by the Babes network: they use the standard URL formats and their +parent studio domain `www.babes.com` is correct for all substudios. Their scraper does not need +to make any changes to the results returned by the API, so their scraper is fully defined in [Babes.yml](../Babes.yml). +The only thing it needs to do is specify which domains it should search which can be done inline. diff --git a/scrapers/AyloAPI/aylo_tokens.json b/scrapers/AyloAPI/aylo_tokens.json new file mode 100644 index 0000000..078717a --- /dev/null +++ b/scrapers/AyloAPI/aylo_tokens.json @@ -0,0 +1,446 @@ +{ + "8thstreetlatinas": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU2MDUxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnJlYWxpdHlraW5ncy5jb20ifQ.qTJlhd_k5acq8xEJamliq71q6_yHQRoK_16sbPWgzb0", + "date": "2024-01-15" + }, + "babes": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjczMzgxLCJicmFuZCI6InNwaWNldmlkcyIsImhvc3RuYW1lIjoid3d3LmJhYmVzLmNvbSJ9.zS5b3PALhFL7D_PEpIXCLaXZJhH-BrguAHZrZjnxzEI", + "date": "2024-01-15" + }, + "bangbros": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjY5ODIxLCJicmFuZCI6ImJhbmdicm9zIiwiaG9zdG5hbWUiOiJiYW5nYnJvcy5jb20ifQ.bNXyzjjpjP2k8FzITICxkrXP9Ful5EdxdUUade9bXL4", + "date": "2024-01-15" + }, + "biempire": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6NTQ1NzEsImJyYW5kIjoiYmllbXBpcmUiLCJob3N0bmFtZSI6Ind3dy5iaWVtcGlyZS5jb20ifQ.km_v_F0Ku5V0N2d8k0wM9Afbtda2leMR-aG4XPepq0E", + "date": "2024-01-15" + }, + "bigdicksatschool": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjE2MDcxLCJicmFuZCI6Im1lbiIsImhvc3RuYW1lIjoid3d3LmJpZ2RpY2tzYXRzY2hvb2wuY29tIn0.7zQbVdCUXDbN4AYwFgXFZbJjjg5vMTTIxEzaY5bAgLM", + "date": "2024-01-15" + }, + "bignaturals": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU2MDUxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnJlYWxpdHlraW5ncy5jb20ifQ.qTJlhd_k5acq8xEJamliq71q6_yHQRoK_16sbPWgzb0", + "date": "2024-01-15" + }, + "bigstr": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjYwMDYxLCJicmFuZCI6ImJpZ3N0ciIsImhvc3RuYW1lIjoid3d3LmJpZ3N0ci5jb20ifQ.DWewmP6k37qRy9k-r_jZuiqMWTOyye0gUJOppx2Mdqk", + "date": "2024-01-15" + }, + "brazzers": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjQ5NjUxLCJicmFuZCI6ImJyYXp6ZXJzIiwiaG9zdG5hbWUiOiJ3d3cuYnJhenplcnMuY29tIn0.jGc4ikH4gH7_xqfVMa60xjWHvlSeozKOkKSNYOa6B4c", + "date": "2024-01-15" + }, + "brazzersnetwork": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MTU0NzEyLCJicmFuZCI6ImJyYXp6ZXJzIiwiaG9zdG5hbWUiOiJ3d3cuYnJhenplcnNuZXR3b3JrLmNvbSJ9.VfmOpqYZMGrC2iqg5FVLcvSneoALHDf9lf0eNRAaiOI", + "date": "2024-01-15" + }, + "bromo": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjUzMzQxLCJicmFuZCI6ImJyb21vIiwiaG9zdG5hbWUiOiJ3d3cuYnJvbW8uY29tIn0.GLgFVXJR9Km7nBmXeoL7fqUl4uu2c_zWHHuisd0DM6A", + "date": "2024-01-15" + }, + "captainstabbin": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU2MDUxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnJlYWxpdHlraW5ncy5jb20ifQ.qTJlhd_k5acq8xEJamliq71q6_yHQRoK_16sbPWgzb0", + "date": "2024-01-15" + }, + "cumfiesta": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU2MDUxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnJlYWxpdHlraW5ncy5jb20ifQ.qTJlhd_k5acq8xEJamliq71q6_yHQRoK_16sbPWgzb0", + "date": "2024-01-15" + }, + "czechhunter": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjYxMjIxLCJicmFuZCI6ImJpZ3N0ciIsImhvc3RuYW1lIjoid3d3LmN6ZWNoaHVudGVyLmNvbSJ9.gx3mU6dpUyMy431jBnpVQpgCY4l_qt6D579VZ1ip_Q4", + "date": "2024-01-15" + }, + "dancingbear": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjY1NTMxLCJicmFuZCI6ImRhbmNpbmdiZWFyIiwiaG9zdG5hbWUiOiJkYW5jaW5nYmVhci5jb20ifQ.IfpV6Z6x5Rcyc2GVmH-S07DOdrpoQJPYoZ1Rt0y85F4", + "date": "2024-01-15" + }, + "danejones": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MTIxMzIyLCJicmFuZCI6InNleHlodWIiLCJob3N0bmFtZSI6Ind3dy5kYW5lam9uZXMuY29tIn0.KtElsFMQDsNHvtmVeZwtk64KWlyjIqr65VZm6K7NdP0", + "date": "2024-01-15" + }, + "daredorm": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU2MDUxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnJlYWxpdHlraW5ncy5jb20ifQ.qTJlhd_k5acq8xEJamliq71q6_yHQRoK_16sbPWgzb0", + "date": "2024-01-15" + }, + "debtdandy": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjYwMTExLCJicmFuZCI6ImJpZ3N0ciIsImhvc3RuYW1lIjoid3d3LmRlYnRkYW5keS5jb20ifQ.ZS0I_Ja59y-EK2Hph18RQqujNhW2wxWl6kbM1TNkQ5s", + "date": "2024-01-15" + }, + "deviante": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjY0MDAxLCJicmFuZCI6ImRldmlhbnRlIiwiaG9zdG5hbWUiOiJ3d3cuZGV2aWFudGUuY29tIn0.HsBec2kgQFDcnU9-9vvIlpzyqjYZix9vxddohpPxpes", + "date": "2024-01-15" + }, + "devianthardcore": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjY0OTMxLCJicmFuZCI6Im1ldHJvaGQiLCJob3N0bmFtZSI6Ind3dy5kZXZpYW50aGFyZGNvcmUuY29tIn0.RmqSPbigY16TATr8crfcmnzljwB7_pzfrro7ZUS8XZU", + "date": "2024-01-15" + }, + "digitalplayground": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjM3NDkxLCJicmFuZCI6ImRpZ2l0YWxwbGF5Z3JvdW5kIiwiaG9zdG5hbWUiOiJ3d3cuZGlnaXRhbHBsYXlncm91bmQuY29tIn0.4vpk4fdgPIT_d5fMinbIilZXAOj56VoFZbYhmyuu1V0", + "date": "2024-01-15" + }, + "digitalplaygroundnetwork": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjM3NzIxLCJicmFuZCI6ImRpZ2l0YWxwbGF5Z3JvdW5kIiwiaG9zdG5hbWUiOiJ3d3cuZGlnaXRhbHBsYXlncm91bmRuZXR3b3JrLmNvbSJ9.FQn0-msyvVbXeTGSb5S1SZYg8GlY8nDfgg_6o6h2PvQ", + "date": "2024-01-15" + }, + "dilfed": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjQ2ODkxLCJicmFuZCI6Im1pbGZlZCIsImhvc3RuYW1lIjoid3d3LmRpbGZlZC5jb20ifQ.8tfh8DMmunPKX7dpm1D_7iVC5gMYsahuf_am1VOEJu4", + "date": "2024-01-15" + }, + "dirtyscout": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjYwMDgxLCJicmFuZCI6ImJpZ3N0ciIsImhvc3RuYW1lIjoid3d3LmRpcnR5c2NvdXQuY29tIn0.ploiuF6IKt6ooK1y_61ayHLtdPmN_pjBzBa2lbxrjuQ", + "date": "2024-01-15" + }, + "doghousedigital": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjY1MDQxLCJicmFuZCI6Im1pbGVoaWdoIiwiaG9zdG5hbWUiOiJ3d3cuZG9naG91c2VkaWdpdGFsLmNvbSJ9.dV0bdvfJ0GbT9JHRL5vdByf8unCw5gx37m3zjCXpB14", + "date": "2024-01-15" + }, + "dontbreakme": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjQ3ODAxLCJicmFuZCI6Im1vZm9zIiwiaG9zdG5hbWUiOiJ3d3cubW9mb3MuY29tIn0.JI9T58u9_UUvLMjiNLAOlphpvPw63u-WOUgqzHU8lRc", + "date": "2024-01-15" + }, + "erito": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6Mjc0MTIxLCJicmFuZCI6InNwaWNldmlkcyIsImhvc3RuYW1lIjoid3d3LmVyaXRvLmNvbSJ9.lWsYLg_7OmpZmadi33l6CceV_cMKmeUWTEwwofjqciE", + "date": "2024-01-15" + }, + "eroticspice": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjY0NTkxLCJicmFuZCI6ImRldmlhbnRlIiwiaG9zdG5hbWUiOiJ3d3cuZXJvdGljc3BpY2UuY29tIn0.eXMD26RewCtx1GX1YeWsJQ1e8GtGWETg_AZVVo9wryU", + "date": "2024-01-15" + }, + "eurosexparties": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU2MDUxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnJlYWxpdHlraW5ncy5jb20ifQ.qTJlhd_k5acq8xEJamliq71q6_yHQRoK_16sbPWgzb0", + "date": "2024-01-15" + }, + "fakehostel": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjY1MDExLCJicmFuZCI6ImZha2VodWIiLCJob3N0bmFtZSI6Ind3dy5mYWtlaG9zdGVsLmNvbSJ9.s0jj6t0toxPIXJLOpQXAiTSFrB0iapQXn9MVe25ELng", + "date": "2024-01-15" + }, + "fakehub": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU4NDkxLCJicmFuZCI6ImZha2VodWIiLCJob3N0bmFtZSI6Ind3dy5mYWtlaHViLmNvbSJ9.3RGwwANuWeQtsdEvWQ7ak8zY1_K7DJ4FyVOBlni6WfE", + "date": "2024-01-15" + }, + "fakehuboriginals": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU4NDkxLCJicmFuZCI6ImZha2VodWIiLCJob3N0bmFtZSI6Ind3dy5mYWtlaHViLmNvbSJ9.3RGwwANuWeQtsdEvWQ7ak8zY1_K7DJ4FyVOBlni6WfE", + "date": "2024-01-15" + }, + "faketaxi": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjY1MDcxLCJicmFuZCI6ImZha2VodWIiLCJob3N0bmFtZSI6Ind3dy5mYWtldGF4aS5jb20ifQ.6C4ChPR6Mb5OnJxCAaCkR5yQdbjZMtSA-qKb5rlaHCk", + "date": "2024-01-15" + }, + "familyhookups": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjY0NzAxLCJicmFuZCI6Im1ldHJvaGQiLCJob3N0bmFtZSI6Ind3dy5mYW1pbHlob29rdXBzLmNvbSJ9.Td-UduZfxAFHEzoNOTqm8bVo813DGiQil1oxH8bL7a8", + "date": "2024-01-15" + }, + "familysinners": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjY0NjYxLCJicmFuZCI6Im1pbGVoaWdoIiwiaG9zdG5hbWUiOiJ3d3cuZmFtaWx5c2lubmVycy5jb20ifQ.yMNAf4vkhwLh6xL5QOUlsnFl37d3YqlBxwEl3pFtr78", + "date": "2024-01-15" + }, + "forgivemefather": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjY0MjUxLCJicmFuZCI6ImRldmlhbnRlIiwiaG9zdG5hbWUiOiJ3d3cuZm9yZ2l2ZW1lZmF0aGVyLmNvbSJ9.y_y8-EoEeSi_r3RuKbij_Y6PzfUe91zZqhY-atuthyo", + "date": "2024-01-15" + }, + "gaywire": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjY3MzQxLCJicmFuZCI6ImdheXdpcmUiLCJob3N0bmFtZSI6ImdheXdpcmUuY29tIn0.GyaudVNowQ8Zfgk2qIVkrECDTW1VqrON2bet5s6qh1M", + "date": "2024-01-15" + }, + "gfleaks": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU2MDUxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnJlYWxpdHlraW5ncy5jb20ifQ.qTJlhd_k5acq8xEJamliq71q6_yHQRoK_16sbPWgzb0", + "date": "2024-01-15" + }, + "girlgrind": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6Nzc4NzEsImJyYW5kIjoibWV0cm9oZCIsImhvc3RuYW1lIjoid3d3LmdpcmxncmluZC5jb20ifQ.KjfMhqI4ipdaV3Sel3RZqmzOoov2wcf6sWYrXDDlVzQ", + "date": "2024-01-15" + }, + "godsofmen": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjE2MTQxLCJicmFuZCI6Im1lbiIsImhvc3RuYW1lIjoid3d3LmdvZHNvZm1lbi5jb20ifQ.kxYerSYKBslM8koEXSz_iZeA8jMsbMKW3yS9QMHtVqI", + "date": "2024-01-15" + }, + "happytugs": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU2MDUxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnJlYWxpdHlraW5ncy5jb20ifQ.qTJlhd_k5acq8xEJamliq71q6_yHQRoK_16sbPWgzb0", + "date": "2024-01-15" + }, + "hdlove": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU2MDUxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnJlYWxpdHlraW5ncy5jb20ifQ.qTJlhd_k5acq8xEJamliq71q6_yHQRoK_16sbPWgzb0", + "date": "2024-01-15" + }, + "hentaipros": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjE1NDAxLCJicmFuZCI6ImhlbnRhaXByb3MiLCJob3N0bmFtZSI6Ind3dy5oZW50YWlwcm9zLmNvbSJ9.7VOJ1JvjqrhJH0yNGGvdWcYJxzBOxjImompG8MWfJMM", + "date": "2024-01-15" + }, + "hentaiprosnetwork": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjE2NDgxLCJicmFuZCI6ImhlbnRhaXByb3MiLCJob3N0bmFtZSI6Ind3dy5oZW50YWlwcm9zbmV0d29yay5jb20ifQ.ln-6dP1XId0xu2dyq2k1Cpm3Fkl_kYjqyNkUgpX2ig0", + "date": "2024-01-15" + }, + "hornybirds": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU2MDUxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnJlYWxpdHlraW5ncy5jb20ifQ.qTJlhd_k5acq8xEJamliq71q6_yHQRoK_16sbPWgzb0", + "date": "2024-01-15" + }, + "hotgirlsgame": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU1NjExLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoibGFuZGluZy5ob3RnaXJsc2dhbWUuY29tIn0.eCnl7uog82wpxUjEaEEFdoe2_UApZ3vzA68_reMNaBg", + "date": "2024-01-15" + }, + "househumpers": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjY4NTExLCJicmFuZCI6InByb3BlcnR5c2V4IiwiaG9zdG5hbWUiOiJ3d3cuaG91c2VodW1wZXJzLmNvbSJ9.2gHC2_XJ4lm40MGEzHQwdEje_ZKpbgc59jrXpP6_Ydg", + "date": "2024-01-15" + }, + "iconmale": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU4NzkxLCJicmFuZCI6Imljb25tYWxlIiwiaG9zdG5hbWUiOiJ3d3cuaWNvbm1hbGUuY29tIn0.sK5gTSZgfUf4VzuB7PknJW2K5jVMSlE1dsX7XDZ4IcE", + "date": "2024-01-15" + }, + "iknowthatgirl": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6Mjg5NjEsImJyYW5kIjoibW9mb3MiLCJob3N0bmFtZSI6Ind3dy5pa25vd3RoYXRnaXJsLmNvbSJ9.YSw0LjG1YX-U-J6NU96DeJZb83nb_NJNMfha7cFUxWk", + "date": "2024-01-15" + }, + "jizzorgy": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjE1OTYxLCJicmFuZCI6Im1lbiIsImhvc3RuYW1lIjoid3d3Lmppenpvcmd5LmNvbSJ9.aCIl5dCJefQFMTp-l9HRtzWMFfPou8mrLv2k15tYXUA", + "date": "2024-01-15" + }, + "kinkyspa": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjY0NTYxLCJicmFuZCI6Im1ldHJvaGQiLCJob3N0bmFtZSI6Ind3dy5raW5reXNwYS5jb20ifQ.p-zOa_GpaEGO6aTd5Cgkg1_qC_9_ms1z5ZbF0y__uZo", + "date": "2024-01-15" + }, + "lesbea": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MTIxMzAyLCJicmFuZCI6InNleHlodWIiLCJob3N0bmFtZSI6Ind3dy5sZXNiZWEuY29tIn0._sd7dk3ImM1uiFXDUBu39EGMEjZMoypaxhVF02RPS_A", + "date": "2024-01-15" + }, + "letstryanal": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjQ3ODAxLCJicmFuZCI6Im1vZm9zIiwiaG9zdG5hbWUiOiJ3d3cubW9mb3MuY29tIn0.JI9T58u9_UUvLMjiNLAOlphpvPw63u-WOUgqzHU8lRc", + "date": "2024-01-15" + }, + "lilhumpers": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU2MDUxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnJlYWxpdHlraW5ncy5jb20ifQ.qTJlhd_k5acq8xEJamliq71q6_yHQRoK_16sbPWgzb0", + "date": "2024-01-15" + }, + "lookathernow": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6NzYzNTEsImJyYW5kIjoibG9va2F0aGVybm93IiwiaG9zdG5hbWUiOiJ3d3cubG9va2F0aGVybm93LmNvbSJ9.BLuaTEc49eZS-eHfbxiGhyu_QoTxHYJ8wyv1Zt5yD8Q", + "date": "2024-01-15" + }, + "loveherass": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjY0NTMxLCJicmFuZCI6ImRldmlhbnRlIiwiaG9zdG5hbWUiOiJ3d3cubG92ZWhlcmFzcy5jb20ifQ.vTRaco9Dhj2TTi6Rf-ZafnMBo3zjSW9c_gZWVw4swXU", + "date": "2024-01-15" + }, + "men": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjY2ODUxLCJicmFuZCI6Im1lbiIsImhvc3RuYW1lIjoid3d3Lm1lbi5jb20ifQ.ImVPqvkOD37Su1qNC8aEV981wIVV8LmqvkCFNGcocPw", + "date": "2024-01-15" + }, + "menofuk": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjE2MTYxLCJicmFuZCI6Im1lbiIsImhvc3RuYW1lIjoid3d3Lm1lbm9mdWsuY29tIn0.ZXJtw-4F6ICskcyCoE-p_yd1eCghiRk1-gd6a_i95Bo", + "date": "2024-01-15" + }, + "metrohd": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjY0MjQxLCJicmFuZCI6Im1ldHJvaGQiLCJob3N0bmFtZSI6Ind3dy5tZXRyb2hkLmNvbSJ9.ilH3SK5_MSxAfa5CQP6sbDcesRqbqpWpicAH7Gwvt_8", + "date": "2024-01-15" + }, + "mikeinbrazil": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU2MDUxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnJlYWxpdHlraW5ncy5jb20ifQ.qTJlhd_k5acq8xEJamliq71q6_yHQRoK_16sbPWgzb0", + "date": "2024-01-15" + }, + "milehighmedia": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU4NjkxLCJicmFuZCI6Im1pbGVoaWdoIiwiaG9zdG5hbWUiOiJ3d3cubWlsZWhpZ2htZWRpYS5jb20ifQ.q0pKFv6aEFTzGASXzQr-YyYXiGSGz9KwMrKtHTi4f_g", + "date": "2024-01-15" + }, + "milfed": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MTM5NjEyLCJicmFuZCI6Im1pbGZlZCIsImhvc3RuYW1lIjoid3d3Lm1pbGZlZC5jb20ifQ._DHHsh-87fkrEqIWafySr7FUG7-kzuW_TS0_7BxH83I", + "date": "2024-01-15" + }, + "milfhunter": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU2MDUxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnJlYWxpdHlraW5ncy5jb20ifQ.qTJlhd_k5acq8xEJamliq71q6_yHQRoK_16sbPWgzb0", + "date": "2024-01-15" + }, + "mofos": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjQ3ODAxLCJicmFuZCI6Im1vZm9zIiwiaG9zdG5hbWUiOiJ3d3cubW9mb3MuY29tIn0.JI9T58u9_UUvLMjiNLAOlphpvPw63u-WOUgqzHU8lRc", + "date": "2024-01-15" + }, + "mofosnetwork": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjYzMTMxLCJicmFuZCI6Im1vZm9zIiwiaG9zdG5hbWUiOiJ3d3cubW9mb3NuZXR3b3JrLmNvbSJ9.X_wDN3V9igZBWGcTX3SupS5HZZzNRn2Ds0C1pn--ms4", + "date": "2024-01-15" + }, + "momsbangteens": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU2MDUxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnJlYWxpdHlraW5ncy5jb20ifQ.qTJlhd_k5acq8xEJamliq71q6_yHQRoK_16sbPWgzb0", + "date": "2024-01-15" + }, + "momslickteens": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU2MDUxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnJlYWxpdHlraW5ncy5jb20ifQ.qTJlhd_k5acq8xEJamliq71q6_yHQRoK_16sbPWgzb0", + "date": "2024-01-15" + }, + "moneytalks": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU2MDUxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnJlYWxpdHlraW5ncy5jb20ifQ.qTJlhd_k5acq8xEJamliq71q6_yHQRoK_16sbPWgzb0", + "date": "2024-01-15" + }, + "monstercurves": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU2MDUxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnJlYWxpdHlraW5ncy5jb20ifQ.qTJlhd_k5acq8xEJamliq71q6_yHQRoK_16sbPWgzb0", + "date": "2024-01-15" + }, + "mygf": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjY0MDYxLCJicmFuZCI6Im15Z2YiLCJob3N0bmFtZSI6Im15Z2YuY29tIn0.oCHODo0eGJvIUMnuFffpdOmMv82luev6JJFLbeXRMjg", + "date": "2024-01-15" + }, + "noirmale": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU4NjcxLCJicmFuZCI6Im5vaXJtYWxlIiwiaG9zdG5hbWUiOiJ3d3cubm9pcm1hbGUuY29tIn0.kc-B0tGsVuh29ZXCFY5avsvXpaEqRexmWAj1nCNwraA", + "date": "2024-01-15" + }, + "papi": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU5MTExLCJicmFuZCI6InBhcGkiLCJob3N0bmFtZSI6Ind3dy5wYXBpLmNvbSJ9.wPrlJoRc4DAnBnSt6IVpU6E1Chj2qqOgmRinAvNbf-A", + "date": "2024-01-15" + }, + "prettydirtyteens": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjY0NDQxLCJicmFuZCI6ImRldmlhbnRlIiwiaG9zdG5hbWUiOiJ3d3cucHJldHR5ZGlydHl0ZWVucy5jb20ifQ.4cuiTahtFC6O5epNS3YWEjEZbmGBQdCfe8mBDjfYZbg", + "date": "2024-01-15" + }, + "propertysex": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU4NjAxLCJicmFuZCI6InByb3BlcnR5c2V4IiwiaG9zdG5hbWUiOiJ3d3cucHJvcGVydHlzZXguY29tIn0.8pPJpStZqSgdVNi9_QKVHvy1ojsKt4FxMjNl9v86Z8M", + "date": "2024-01-15" + }, + "publicagent": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MTIxMjYyLCJicmFuZCI6ImZha2VodWIiLCJob3N0bmFtZSI6Ind3dy5wdWJsaWNhZ2VudC5jb20ifQ.ltinL04qSLgR9Bi74oaWd9e_crv9CcQtJ03zcdAii9g", + "date": "2024-01-15" + }, + "publicpickups": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjQ3ODAxLCJicmFuZCI6Im1vZm9zIiwiaG9zdG5hbWUiOiJ3d3cubW9mb3MuY29tIn0.JI9T58u9_UUvLMjiNLAOlphpvPw63u-WOUgqzHU8lRc", + "date": "2024-01-15" + }, + "pure18": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU2MDUxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnJlYWxpdHlraW5ncy5jb20ifQ.qTJlhd_k5acq8xEJamliq71q6_yHQRoK_16sbPWgzb0", + "date": "2024-01-15" + }, + "realitydudes": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjY5MTAxLCJicmFuZCI6InJlYWxpdHlkdWRlcyIsImhvc3RuYW1lIjoid3d3LnJlYWxpdHlkdWRlcy5jb20ifQ.Hm0N1VgqvCTsau_R6434LfJE9KNWyqCGsfC6d_gNAGk", + "date": "2024-01-15" + }, + "realitydudesnetwork": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjUzMzIxLCJicmFuZCI6InJlYWxpdHlkdWRlcyIsImhvc3RuYW1lIjoid3d3LnJlYWxpdHlkdWRlc25ldHdvcmsuY29tIn0.QM8_ROVFm7is6W7XBDyKlZ4twluanCBrOAmVd0rIOls", + "date": "2024-01-15" + }, + "realityjunkies": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjY0NzkxLCJicmFuZCI6Im1pbGVoaWdoIiwiaG9zdG5hbWUiOiJ3d3cucmVhbGl0eWp1bmtpZXMuY29tIn0.qlMn3jXPcgtHcu0GcIKfUNsDTKsSbbeHVcVEJWsYRPc", + "date": "2024-01-15" + }, + "realitykings": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU2MDUxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnJlYWxpdHlraW5ncy5jb20ifQ.qTJlhd_k5acq8xEJamliq71q6_yHQRoK_16sbPWgzb0", + "date": "2024-01-15" + }, + "recklessinmiami": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU2MDUxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnJlYWxpdHlraW5ncy5jb20ifQ.qTJlhd_k5acq8xEJamliq71q6_yHQRoK_16sbPWgzb0", + "date": "2024-01-15" + }, + "rk": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjQxMTUxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnJrLmNvbSJ9.aeX0NldDFgBAgGHLIufoGtE-P3dSRKR135H-_1Ro_XY", + "date": "2024-01-15" + }, + "roundandbrown": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU2MDUxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnJlYWxpdHlraW5ncy5jb20ifQ.qTJlhd_k5acq8xEJamliq71q6_yHQRoK_16sbPWgzb0", + "date": "2024-01-15" + }, + "seancody": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjUyNDYxLCJicmFuZCI6InNlYW5jb2R5IiwiaG9zdG5hbWUiOiJ3d3cuc2VhbmNvZHkuY29tIn0.cnIHVVauhNOWSpLpjUtmEMuXHCkj9_2zK1ifpN4S2_c", + "date": "2024-01-15" + }, + "sexworking": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjY1MjQxLCJicmFuZCI6ImRldmlhbnRlIiwiaG9zdG5hbWUiOiJ3d3cuc2V4d29ya2luZy5jb20ifQ.ynh5di-Zu_0x7-wVw9nywZXyC_l_OlmRTRxusruR14o", + "date": "2024-01-15" + }, + "sexyhub": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU4ODUxLCJicmFuZCI6InNleHlodWIiLCJob3N0bmFtZSI6Ind3dy5zZXh5aHViLmNvbSJ9.fNKZk-SHhddV7IrnSg5IjISfCwBIUo348waRBFZGGak", + "date": "2024-01-15" + }, + "shewillcheat": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjY0NzIxLCJicmFuZCI6Im1ldHJvaGQiLCJob3N0bmFtZSI6Ind3dy5zaGV3aWxsY2hlYXQuY29tIn0.7RuI8MaPxAWfStp2CiMBCI9hqSFP7ooxAxRvNOQGEoE", + "date": "2024-01-15" + }, + "sneakysex": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU2MDUxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnJlYWxpdHlraW5ncy5jb20ifQ.qTJlhd_k5acq8xEJamliq71q6_yHQRoK_16sbPWgzb0", + "date": "2024-01-15" + }, + "squirted": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjY0MTUxLCJicmFuZCI6InNxdWlydGVkIiwiaG9zdG5hbWUiOiJ3d3cuc3F1aXJ0ZWQuY29tIn0.lsVcm2cIyLhCZXTRvPAMTD217DgOdx_ApmwcOooLCQY", + "date": "2024-01-15" + }, + "str8togay": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjE1ODYxLCJicmFuZCI6Im1lbiIsImhvc3RuYW1lIjoid3d3LnN0cjh0b2dheS5jb20ifQ.Tkse0n4sqcqBma004UCEGDWo1P5-AwbY2K0azbyqX5I", + "date": "2024-01-15" + }, + "sweetheartvideo": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU4NzMxLCJicmFuZCI6Im1pbGVoaWdoIiwiaG9zdG5hbWUiOiJ3d3cuc3dlZXRoZWFydHZpZGVvLmNvbSJ9.6PS8GlWta5TFtFcZUyBd3-C0TUmDg0AzmS1g-OKDpHI", + "date": "2024-01-15" + }, + "sweetsinner": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU4NzExLCJicmFuZCI6Im1pbGVoaWdoIiwiaG9zdG5hbWUiOiJ3d3cuc3dlZXRzaW5uZXIuY29tIn0.eZJFnI_DKTzwoq0nVTgqivE0MJNJcb3qrympLfdXuI8", + "date": "2024-01-15" + }, + "taboomale": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MTU0MDkyLCJicmFuZCI6InRhYm9vbWFsZSIsImhvc3RuYW1lIjoid3d3LnRhYm9vbWFsZS5jb20ifQ.zozlvTjJ3TN8crwyhgEoNFm_Jp8sgwO5lfSQTzps9y0", + "date": "2024-01-15" + }, + "teenslovehugecocks": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjY4MjIxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnRlZW5zbG92ZWh1Z2Vjb2Nrcy5jb20ifQ.mHD0bzxc_5lHHRFCKDxsvFZXvT5gfB3oOCz9sjCHHAg", + "date": "2024-01-15" + }, + "thegayoffice": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjE2MTIxLCJicmFuZCI6Im1lbiIsImhvc3RuYW1lIjoid3d3LnRoZWdheW9mZmljZS5jb20ifQ.463FpF2LPmsrMvZ0C4zP8wDZr126dOQbORVRDQ6XJYo", + "date": "2024-01-15" + }, + "toptobottom": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjE2MDUxLCJicmFuZCI6Im1lbiIsImhvc3RuYW1lIjoid3d3LnRvcHRvYm90dG9tLmNvbSJ9.eN0MvzpaefOPF0mJTxnaW3xG9x4LFgD1ekaQI7FQ2bI", + "date": "2024-01-15" + }, + "trannysurprise": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU5MTQxLCJicmFuZCI6InRyYW5zaGFyZGVyIiwiaG9zdG5hbWUiOiJ3d3cudHJhbnNoYXJkZXIuY29tIn0.K1WL4n0onDKhlRWgM-zd6B2nkAFdSZ7FWv3F8ay-kFs", + "date": "2024-01-15" + }, + "transangels": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU1NDAxLCJicmFuZCI6InRyYW5zYW5nZWxzIiwiaG9zdG5hbWUiOiJ3d3cudHJhbnNhbmdlbHMuY29tIn0.8QIwJbFZ3RkvWHCCPKeCsz8hK2vq5-2pnlvi-l3D81k", + "date": "2024-01-15" + }, + "transangelsnetwork": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjQ2MjIxLCJicmFuZCI6InRyYW5zYW5nZWxzIiwiaG9zdG5hbWUiOiJ3d3cudHJhbnNhbmdlbHNuZXR3b3JrLmNvbSJ9.uIpI7uewxVwVKCGOnHrHV-P_LewicrUJkoj9cYxYvIQ", + "date": "2024-01-15" + }, + "transharder": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU5MTQxLCJicmFuZCI6InRyYW5zaGFyZGVyIiwiaG9zdG5hbWUiOiJ3d3cudHJhbnNoYXJkZXIuY29tIn0.K1WL4n0onDKhlRWgM-zd6B2nkAFdSZ7FWv3F8ay-kFs", + "date": "2024-01-15" + }, + "transsensual": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU4NzYxLCJicmFuZCI6InRyYW5zc2Vuc3VhbCIsImhvc3RuYW1lIjoid3d3LnRyYW5zc2Vuc3VhbC5jb20ifQ.gHhYTLQ10Kxs96TsYtUHeWGMv3a-I8AKPVLfh89FsII", + "date": "2024-01-15" + }, + "trueamateurs": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjY2MzMxLCJicmFuZCI6InNwaWNldmlkcyIsImhvc3RuYW1lIjoid3d3LnRydWVhbWF0ZXVycy5jb20ifQ.eosjGhhBK5PCljEtxGZewlNTEzIgnTexyiEYuvHela4", + "date": "2024-01-15" + }, + "tube8vip": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6NDE2ODEsImJyYW5kIjoidHViZTh2aXAiLCJob3N0bmFtZSI6Ind3dy50dWJlOHZpcC5jb20ifQ.YCmySJ2WYBFYON7nMtfX7DtRxNpHRR_cDF21CuSbvnQ", + "date": "2024-01-15" + }, + "twinkpop": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjcwNzMxLCJicmFuZCI6InR3aW5rcG9wIiwiaG9zdG5hbWUiOiJ3d3cudHdpbmtwb3AuY29tIn0.i3RaaiTlSclMEYowhmIOX6aW6EA23vhOI0kGPd1ywJM", + "date": "2024-01-15" + }, + "twistys": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU4NjUxLCJicmFuZCI6InR3aXN0eXMiLCJob3N0bmFtZSI6Ind3dy50d2lzdHlzLmNvbSJ9.SosKDXnly59Mk5bNDlAxc2RNtb5xS_lj6S_hE4LsmSE", + "date": "2024-01-15" + }, + "twistysnetwork": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjYzMzExLCJicmFuZCI6InR3aXN0eXMiLCJob3N0bmFtZSI6Ind3dy50d2lzdHlzbmV0d29yay5jb20ifQ.4HKE9BbSJi7tv0ZYR8DLDyH8CXJ2Dhy5g_lm6Wc51nc", + "date": "2024-01-15" + }, + "virtualporn": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjcxNDcxLCJicmFuZCI6InZpcnR1YWxwb3JuIiwiaG9zdG5hbWUiOiJ2aXJ0dWFscG9ybi5jb20ifQ.aKHJp0GB8SEK6thuVL_sOD_co23kNGPBKY_H1tuNqDc", + "date": "2024-01-15" + }, + "voyr": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjY4ODUxLCJicmFuZCI6InZveXIiLCJob3N0bmFtZSI6Ind3dy52b3lyLmNvbSJ9.YJARD5xf5Q0qFFBak-POUCWOih5ntxvIz0hLuYHmsgg", + "date": "2024-01-15" + }, + "welivetogether": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6NDM1MzEsImJyYW5kIjoicmVhbGl0eWtpbmdzIiwiaG9zdG5hbWUiOiJ3d3cud2VsaXZldG9nZXRoZXIuY29tIn0.d7Kdctu_fKmM9MbLExsEQv7UJXJE1QqEBQIgIKgOwDQ", + "date": "2024-01-15" + }, + "whynotbi": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjU2ODQxLCJicmFuZCI6IndoeW5vdGJpIiwiaG9zdG5hbWUiOiJ3d3cud2h5bm90YmkuY29tIn0.Gks9sLXFPZJYKZB0e69u2Pkc0MKtrj7D_zmF2Fhxhhs", + "date": "2024-01-15" + }, + "workmeharder": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjQxMTUxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnJrLmNvbSJ9.aeX0NldDFgBAgGHLIufoGtE-P3dSRKR135H-_1Ro_XY", + "date": "2024-01-15" + } +} \ No newline at end of file diff --git a/scrapers/AyloAPI/config.py b/scrapers/AyloAPI/config.py new file mode 100644 index 0000000..a2d9341 --- /dev/null +++ b/scrapers/AyloAPI/config.py @@ -0,0 +1,11 @@ +# User variables +## Minimum Ratio to consider the scene to scrape (Ratio between Title and API Title) +SET_RATIO = 0.75 + +## Check the SSL Certificate: set to False if you have problems with the SSL Errors +CHECK_SSL_CERT = True + +## User Agent to use for the requests +USER_AGENT = ( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0" +) diff --git a/scrapers/AyloAPI/domains.py b/scrapers/AyloAPI/domains.py new file mode 100644 index 0000000..690f3e1 --- /dev/null +++ b/scrapers/AyloAPI/domains.py @@ -0,0 +1,67 @@ +import atexit +import datetime +import json +from pathlib import Path +from typing import Callable +from urllib.parse import urlparse + +""" +Keeps a cache of instance tokens for the Aylo API. + +Domains are assumed to omit the TLD, e.g. "brazzers" instead of "brazzers.com" +""" + + +__TOKENS_FILE = Path(__file__).parent / "aylo_tokens.json" +try: + __TOKENS = json.load(__TOKENS_FILE.open(encoding="utf-8")) +except (FileNotFoundError, json.JSONDecodeError): + __TOKENS = {} + + +@atexit.register +def __save_domains(): + sorted_domains = dict(sorted(__TOKENS.items(), key=lambda x: x[0])) + json.dump(sorted_domains, __TOKENS_FILE.open("w", encoding="utf-8"), indent=2) + + +def site_name(url: str) -> str: + """ + Returns the site name of the given URL, e.g. "brazzers" for "https://www.brazzers.com" + """ + return urlparse(url).netloc.split(".")[-2] + + +def get_token_for(domain: str, fallback: Callable[[str], str | None]) -> str | None: + """ + Returns a token for the given domain. If the stored token is not valid, the provided + fallback function will be used to generate a new token. + + If the fallback function returns None, it will return None. + """ + today = datetime.datetime.today().strftime("%Y-%m-%d") + + # If the domain is in the list and if the token is still valid we just return it + if (entry := __TOKENS.get(domain)) and entry["date"] == today and entry["token"]: + return entry["token"] + + # Generate the token using the provided fallback function + url = f"https://www.{domain}.com" + token = fallback(url) + if not token: + return None + # And persist it + __TOKENS[domain] = { + "token": token, + "date": today, + } + + return token + + +def all_domains() -> list[str]: + """ + Returns a list of all known domains for the Aylo API + """ + + return list(__TOKENS.keys()) diff --git a/scrapers/AyloAPI/package b/scrapers/AyloAPI/package new file mode 100644 index 0000000..b148107 --- /dev/null +++ b/scrapers/AyloAPI/package @@ -0,0 +1,2 @@ +# script used as a dependency only +name: AyloAPI diff --git a/scrapers/AyloAPI/scrape.py b/scrapers/AyloAPI/scrape.py new file mode 100644 index 0000000..03afadd --- /dev/null +++ b/scrapers/AyloAPI/scrape.py @@ -0,0 +1,907 @@ +import json +import re +import sys +import difflib +from datetime import datetime +from typing import Any, Callable +from urllib.parse import urlparse + +try: + import requests +except ModuleNotFoundError: + print( + "You need to install the requests module." + "(https://docs.python-requests.org/en/latest/user/install/)\n" + "If you have pip (normally installed with python)," + "run this command in a terminal (cmd): python -m pip install requests", + file=sys.stderr, + ) + sys.exit() + + +try: + import py_common.log as log + from py_common.util import dig, scraper_args + from py_common.types import ( + ScrapedScene, + ScrapedMovie, + ScrapedPerformer, + ScrapedStudio, + ScrapedTag, + ) + import AyloAPI.domains as domains + import AyloAPI.config as config + from AyloAPI.slugger import slugify +except ModuleNotFoundError: + print( + "You need to download the folder 'py_common' from the community repo! " + "(CommunityScrapers/tree/master/scrapers/py_common)", + file=sys.stderr, + ) + sys.exit() + + +def default_postprocess(obj: Any, _) -> Any: + return obj + + +# network stuff +def __raw_request(url, headers) -> requests.Response: + log.trace(f"Sending GET request to {url}") + response = requests.get( + url, headers=headers, timeout=10, verify=config.CHECK_SSL_CERT + ) + + if response.status_code == 429: + log.error( + "[REQUEST] 429 Too Many Requests: " + "you have sent too many requests in a given amount of time." + ) + sys.exit(1) + + # Even a 404 will contain an instance token + return response + + +def __api_request(url: str, headers: dict) -> dict | None: + result = __raw_request(url, headers) + api_response = result.json() + if isinstance(api_response, list): + api_search_errors = "\n- ".join( + json.dumps(res, indent=None) for res in api_response + ) + log.error(f"Errors from API:\n{api_search_errors}") + return None + + with open("api_response.json", "w", encoding="utf-8") as f: + json.dump(api_response, f, indent=2) + + return api_response["result"] + + +def _create_headers_for(domain: str) -> dict[str, str]: + # If we haven't stored a token we must provide a function to get one + def get_instance_token(url: str) -> str | None: + r = __raw_request(url, {"User-Agent": config.USER_AGENT}) + if r and (token := r.cookies.get("instance_token")): + return token + log.error( + f"Failed to get instance_token from '{url}': " + "are you sure this site is in the Aylo network?" + ) + + api_token = domains.get_token_for(domain, fallback=get_instance_token) + if api_token is None: + log.error(f"Unable to get an API token for '{domain}'") + sys.exit(1) + + api_headers = { + "Instance": api_token, + "User-Agent": config.USER_AGENT, + "Origin": f"https://{domain}", + "Referer": f"https://{domain}", + } + return api_headers + + +def _construct_url(api_result: dict) -> str: + """ + Tries to construct a valid public URL for an API result + + This will often result in scene links that point to the parent network site, + so we might want to add wrapper scrapers that can add the correct URL as well + + For example, a scene from We Live Together will have an URL for realitykings.com + but that scene is also on welivetogether.com and that might be considered more canonical + """ + + brand = api_result["brand"] + type_ = api_result["type"] + id_ = api_result["id"] + slug = slugify(api_result["title"]) + return f"https://www.{brand}.com/{type_}/{id_}/{slug}" + + +def _construct_performer_url(api_result: dict, site: str) -> str: + id_ = api_result["id"] + slug = slugify(api_result["name"]) + return f"https://www.{site}.com/model/{id_}/{slug}" + + +## Helper functions for the objects returned from Aylo's API +def get_studio(api_object: dict) -> ScrapedStudio | None: + studio_name = dig(api_object, "collections", 0, "name") + parent_name = dig(api_object, "brandMeta", ("displayName", "name", "shortName")) + if studio_name: + if parent_name.lower() != studio_name.lower(): + return { + "name": studio_name, + "parent": {"name": parent_name}, + } + return {"name": studio_name} + elif parent_name: + return {"name": parent_name} + + log.error(f"No studio for {api_object['type']} with id {api_object['id']}") + return None + + +def get_tags(api_object: dict) -> list[ScrapedTag]: + tags = api_object.get("tags", []) + return [{"name": x["name"].strip()} for x in tags if "name" in x] + + +state_map = { + "AK": "USA", + "AL": "USA", + "AR": "USA", + "AZ": "USA", + "CA": "USA", + "CO": "USA", + "CT": "USA", + "DC": "USA", + "DE": "USA", + "FL": "USA", + "GA": "USA", + "HI": "USA", + "IA": "USA", + "ID": "USA", + "IL": "USA", + "IN": "USA", + "KS": "USA", + "KY": "USA", + "LA": "USA", + "MA": "USA", + "MD": "USA", + "ME": "USA", + "MI": "USA", + "MN": "USA", + "MO": "USA", + "MS": "USA", + "MT": "USA", + "NC": "USA", + "ND": "USA", + "NE": "USA", + "NH": "USA", + "NJ": "USA", + "NM": "USA", + "NV": "USA", + "NY": "USA", + "OH": "USA", + "OK": "USA", + "OR": "USA", + "PA": "USA", + "RI": "USA", + "SC": "USA", + "SD": "USA", + "TN": "USA", + "TX": "USA", + "UT": "USA", + "VA": "USA", + "VT": "USA", + "WA": "USA", + "WI": "USA", + "WV": "USA", + "WY": "USA", + "Alabama": "USA", + "Alaska": "USA", + "Arizona": "USA", + "Arkansas": "USA", + "California": "USA", + "Colorado": "USA", + "Connecticut": "USA", + "Delaware": "USA", + "Florida": "USA", + "Georgia": "USA", + "Hawaii": "USA", + "Idaho": "USA", + "Illinois": "USA", + "Indiana": "USA", + "Iowa": "USA", + "Kansas": "USA", + "Kentucky": "USA", + "Louisiana": "USA", + "Maine": "USA", + "Maryland": "USA", + "Massachusetts": "USA", + "Michigan": "USA", + "Minnesota": "USA", + "Mississippi": "USA", + "Missouri": "USA", + "Montana": "USA", + "Nebraska": "USA", + "Nevada": "USA", + "New Hampshire": "USA", + "New Jersey": "USA", + "New Mexico": "USA", + "New York": "USA", + "North Carolina": "USA", + "North Dakota": "USA", + "Ohio": "USA", + "Oklahoma": "USA", + "Oregon": "USA", + "Pennsylvania": "USA", + "Rhode Island": "USA", + "South Carolina": "USA", + "South Dakota": "USA", + "Tennessee": "USA", + "Texas": "USA", + "Utah": "USA", + "Vermont": "USA", + "Virginia": "USA", + "Washington": "USA", + "West Virginia": "USA", + "Wisconsin": "USA", + "Wyoming": "USA", +} + + +## Helper functions to convert from Aylo's API to Stash's scaper return types +def to_scraped_performer( + performer_from_api: dict, site: str | None = None +) -> ScrapedPerformer: + if (type_ := dig(performer_from_api, "brand")) and type_ not in ( + "actorsandtags", + # Older sites use this type + "phpactors", + ): + wrong_type = performer_from_api.get("type", "mystery") + wrong_id = performer_from_api.get("id", "unknown") + log.error(f"Attempted to scrape a '{wrong_type}' (ID: {wrong_id}) as a scene.") + raise ValueError("Invalid performer from API") + # This is all we get when scraped as part of a scene or movie + performer: ScrapedPerformer = { + "name": performer_from_api["name"], + "gender": performer_from_api["gender"], + } + + if aliases := ", ".join( + alias + for alias in performer_from_api.get("aliases", []) + if alias.lower() != performer["name"].lower() + ): + performer["aliases"] = aliases + + if details := performer_from_api.get("bio"): + performer["details"] = details + + # All remaining fields are only available when scraped directly + if height := performer_from_api.get("height"): + # Convert to cm + performer["height"] = str(round(height * 2.54)) + + if weight := performer_from_api.get("weight"): + # Convert to kg + performer["weight"] = str(round(weight / 2.205)) + + if birthdate := performer_from_api.get("birthday"): + performer["birthdate"] = datetime.strptime( + birthdate, "%Y-%m-%dT%H:%M:%S%z" + ).strftime("%Y-%m-%d") + + if birthplace := performer_from_api.get("birthPlace"): + performer["country"] = birthplace + + if measurements := performer_from_api.get("measurements"): + performer["measurements"] = measurements + + images = dig(performer_from_api, "images", "master_profile") or {} + # Performers can have multiple images, try to get the biggest versions + if images := [ + img + for alt in images.values() + if (img := dig(alt, ("xx", "xl", "lg", "md", "sm"), "url")) + ]: + performer["images"] = images + + if tags := get_tags(performer_from_api): + performer["tags"] = tags + + if site: + performer["url"] = _construct_performer_url(performer_from_api, site) + + return performer + + +def to_scraped_movie(movie_from_api: dict) -> ScrapedMovie: + if not movie_from_api["type"] == "movie": + wrong_type = movie_from_api["type"] + wrong_id = movie_from_api["id"] + log.error(f"Attempted to scrape a '{wrong_type}' (ID: {wrong_id}) as a movie.") + raise ValueError("Invalid movie from API") + + movie: ScrapedMovie = { + "name": movie_from_api["title"], + "synopsis": dig(movie_from_api, "description"), + "front_image": dig(movie_from_api, "images", "cover", "0", "xx", "url"), + "url": _construct_url(movie_from_api), + } + + if date := dig(movie_from_api, "dateReleased"): + movie["date"] = datetime.strptime(date, "%Y-%m-%dT%H:%M:%S%z").strftime( + "%Y-%m-%d" + ) + + if studio := get_studio(movie_from_api): + movie["studio"] = studio + + return movie + + +def to_scraped_scene(scene_from_api: dict) -> ScrapedScene: + if not scene_from_api["type"] == "scene": + wrong_type = scene_from_api["type"] + wrong_id = scene_from_api["id"] + log.error(f"Attempted to scrape a '{wrong_type}' (ID: {wrong_id}) as a scene.") + raise ValueError("Invalid scene from API") + + scene: ScrapedScene = { + "title": scene_from_api["title"], + "code": str(scene_from_api["id"]), + "details": dig(scene_from_api, "description"), + "date": datetime.strptime( + scene_from_api["dateReleased"], "%Y-%m-%dT%H:%M:%S%z" + ).strftime("%Y-%m-%d"), + "url": _construct_url(scene_from_api), + "performers": [ + to_scraped_performer(p, dig(scene_from_api, "brand")) + for p in scene_from_api["actors"] + ], + "tags": get_tags(scene_from_api), + } + + if image := dig( + scene_from_api, + "images", + ("poster", "poster_fallback"), + "0", + ("xx", "xl", "lg", "md", "sm", "xs"), + "url", + ): + scene["image"] = image + + if dig(scene_from_api, "parent", "type") == "movie": + scene["movies"] = [to_scraped_movie(scene_from_api["parent"])] + + if studio := get_studio(scene_from_api): + scene["studio"] = studio + + if markers := scene_from_api.get("timeTags"): + log.debug( + f"This scene has {len(markers)} markers" + " but scraping markers hasn't been implemented yet" + ) + + return scene + + +## Primary functions used to scrape from Aylo's API +def scene_from_url( + url, postprocess: Callable[[ScrapedScene, dict], ScrapedScene] = default_postprocess +) -> ScrapedScene | None: + """ + Scrapes a scene from a URL, running an optional postprocess function on the result + """ + + if not (match := re.search(r"/(\d+)/", url)): + log.error( + "Can't get the ID of the Scene. " + "Are you sure that URL is from a site in the Aylo Network?" + ) + return None + scene_id = match.group(1) + + log.debug(f"Scene ID: {scene_id}") + + # Extract the domain from the URL + domain = domains.site_name(url) + + api_URL = f"https://site-api.project1service.com/v2/releases/{scene_id}" + api_headers = _create_headers_for(domain) + api_scene_json = __api_request(api_URL, api_headers) + + if not api_scene_json: + return None + + if dig(api_scene_json, "type") == "scene": + return postprocess(to_scraped_scene(api_scene_json), api_scene_json) + + # If you scrape a trailer we can still get the correct scene data + if dig(api_scene_json, "parent", "type") == "scene": + log.debug("Result is a movie or trailer, getting scene data from parent") + return postprocess( + to_scraped_scene(api_scene_json["parent"]), api_scene_json["parent"] + ) + + +def performer_from_url( + url, + postprocess: Callable[ + [ScrapedPerformer, dict], ScrapedPerformer + ] = default_postprocess, +) -> ScrapedPerformer | None: + """ + Scrapes a performer from a URL, running an optional postprocess function on the result + """ + + if not (match := re.search(r"/(\d+)/", url)): + log.error( + "Can't get the ID of the performer. " + "Are you sure that URL is from a site in the Aylo Network?" + ) + return None + performer_id = match.group(1) + + log.debug(f"Performer ID: {performer_id}") + + # Extract the domain from the URL + domain = urlparse(url).netloc.split(".")[-2] + + api_URL = f"https://site-api.project1service.com/v1/actors/{performer_id}" + api_headers = _create_headers_for(domain) + api_performer_json = __api_request(api_URL, api_headers) + if not api_performer_json: + return None + + return postprocess(to_scraped_performer(api_performer_json), api_performer_json) + + +def movie_from_url( + url, postprocess: Callable[[ScrapedMovie, dict], ScrapedMovie] = default_postprocess +) -> ScrapedMovie | None: + """ + Scrapes a movie from a URL, running an optional postprocess function on the result + """ + + if not (match := re.search(r"/(\d+)/", url)): + log.error( + "Can't get the ID of the movie. " + "Are you sure that URL is from a site in the Aylo Network?" + ) + return None + movie_id = match.group(1) + + log.debug(f"Movie ID: {movie_id}") + + # Extract the domain from the URL + domain = urlparse(url).netloc.split(".")[-2] + + api_URL = f"https://site-api.project1service.com/v2/releases/{movie_id}" + api_headers = _create_headers_for(domain) + api_movie_json = __api_request(api_URL, api_headers) + if not api_movie_json: + return None + + if dig(api_movie_json, "type") == "movie": + return postprocess(to_scraped_movie(api_movie_json), api_movie_json) + + # If you scrape a scene or trailer, we can still get the correct movie data + if dig(api_movie_json, "parent", "type") == "movie": + log.debug("Result is a scene or trailer, getting movie data from parent") + return postprocess( + to_scraped_movie(api_movie_json["parent"]), api_movie_json["parent"] + ) + + +# Since the "Scrape with..." function in Stash expects a single result, we provide +# this function to return the first result that exceeds the threshold so +# that users don't need to use scene_search directly and THEN take the first result +def find_scene( + query: str, + search_domains: list[str] | None = None, + min_ratio: float = 0.9, + postprocess: Callable[[ScrapedScene, dict], ScrapedScene] = default_postprocess, +) -> ScrapedScene | None: + """ + Searches the Aylo API for scenes matching the given query and returns the + first match that exceeds `min_ratio` similarity: a float between 0 and 1. + + Differs from `scene_from_query` in that it only returns the first match, + returning early as soon as it finds a match that exceeds the threshold. + + If search_domains is provided it will only search those domains, + otherwise it will search all (this could be very slow!) + + Domains should not include the "www." or ".com" parts of the domain: 'brazzers', 'realitykings', etc. + + If postprocess is provided it will be called on the result before returning + """ + if not query: + log.error("No query provided") + return None + + if not search_domains: + log.warning("Searcing all known domains, this could be very slow!") + search_domains = domains.all_domains() + + log.debug(f"Matching '{query}' against {len(search_domains)} sites") + + def matcher(candidate_title: str): + return round( + difflib.SequenceMatcher( + None, query.lower(), candidate_title.lower() + ).ratio(), + 3, + ) + + for domain in search_domains: + log.debug(f"Searching '{domain}'") + + api_headers = _create_headers_for(domain) + search_url = f"https://site-api.project1service.com/v2/releases?search={query}&type=scene" + api_response = __api_request(search_url, api_headers) + + if api_response is None: + log.error(f"Failed to search '{domain}'") + continue + if not api_response: + log.debug(f"No results from '{domain}'") + continue + + best_match = max(api_response, key=lambda x: matcher(x["title"])) + ratio = matcher(best_match["title"]) + if ratio >= min_ratio: + log.info( + f"Found scene '{best_match['title']}' with {ratio:.2%} similarity " + f"to '{query}' (exceeds {min_ratio:.2%} threshold) " + f"on '{domain}'" + ) + return postprocess(to_scraped_scene(best_match), best_match) + else: + log.info( + f"Giving up on '{domain}': best result '{best_match['title']}' " + f"with {ratio:.2%} similarity" + ) + + log.error(f"No scenes found for '{query}'") + return None + + +# Since the "Scrape with..." function in Stash expects a single result, we provide +# this function to return the first result that exceeds the threshold so +# that users don't need to use performer_search directly and THEN take the first result +def find_performer( + query: str, + search_domains: list[str] | None = None, + min_ratio: float = 0.9, + postprocess: Callable[ + [ScrapedPerformer, dict], ScrapedPerformer + ] = default_postprocess, +) -> ScrapedPerformer | None: + """ + Searches the Aylo API for performers matching the given query and returns the + first match that exceeds `min_ratio` similarity: a float between 0 and 1. + + Differs from `search_performer` in that it only returns the first match, + returning early as soon as it finds a match that exceeds the threshold. + + If search_domains is provided it will only search those domains, + otherwise it will search all (this could be very slow!) + + Domains should not include the "www." or ".com" parts of the domain: 'brazzers', 'realitykings', etc. + + If postprocess is provided it will be called on the result before returning + """ + if not query: + log.error("No query provided") + return None + + if not search_domains: + log.warning("Searcing all known domains, this could be very slow!") + search_domains = domains.all_domains() + + log.debug(f"Matching '{query}' against {len(search_domains)} sites") + + def matcher(candidate_name: str): + return round( + difflib.SequenceMatcher( + None, query.lower(), candidate_name.lower() + ).ratio(), + 3, + ) + + for domain in search_domains: + log.debug(f"Searching {domain}") + + api_headers = _create_headers_for(domain) + search_url = f"https://site-api.project1service.com/v1/actors?search={query}" + api_response = __api_request(search_url, api_headers) + + if api_response is None: + log.error(f"Failed to search {domain}") + continue + if not api_response: + log.debug(f"No results from {domain}") + continue + + best_match = max(api_response, key=lambda x: matcher(x["name"])) + ratio = matcher(best_match["name"]) + if ratio >= min_ratio: + log.info( + f"Found performer '{best_match['name']}' with {ratio:.2%} similarity " + f"to '{query}' (exceeds {min_ratio:.2%} threshold) " + f"on '{domain}'" + ) + return postprocess(to_scraped_performer(best_match, domain), best_match) + else: + log.info( + f"Giving up on '{domain}': best result '{best_match['name']}' " + f"with {ratio:.2%} similarity" + ) + + log.error(f"No performers found for '{query}'") + return None + + +def scene_search( + query: str, + search_domains: list[str] | None = None, + postprocess: Callable[[ScrapedScene, dict], ScrapedScene] = default_postprocess, +) -> list[ScrapedScene]: + """ + Searches the Aylo API for the given query and returns a list of ScrapedScene + + If search_domains is provided it will only search those domains, + otherwise it will search all known domains (this could be very slow!) + + Domains should not include the "www." or ".com" parts of the domain: 'brazzers', 'realitykings', etc. + + If postprocess is provided it will be called on each result before returning + """ + if not query: + log.error("No query provided") + return [] + + if not search_domains: + log.warning("Searcing all known domains, this could be very slow!") + search_domains = domains.all_domains() + + log.debug(f"Searching for '{query}' on {len(search_domains)} sites") + + # The source of the results will be based on the token used (Brazzers, Reality Kings, etc.) + search_url = f"https://site-api.project1service.com/v2/releases?search={query}&type=scene&limit=10" + search_results = [] + already_seen = set() + + def matcher(candidate: ScrapedScene): + return round( + difflib.SequenceMatcher( + None, + query.lower(), + candidate["title"].lower(), # type: ignore (title is always set) + ).ratio(), + 3, + ) + + for domain in search_domains: + log.debug(f"Searching {domain}") + + api_headers = _create_headers_for(domain) + api_response = __api_request(search_url, api_headers) + if api_response is None: + log.error(f"Failed to search {domain}") + continue + if not api_response: + log.debug(f"No results from {domain}") + continue + + candidates = [ + postprocess(to_scraped_scene(result), result) + for result in api_response + if result["id"] not in already_seen + ] + search_results.extend( + c + for c in candidates + if matcher(c) > 0.5 and c.get("code") not in already_seen + ) + already_seen.update(c.get("code") for c in candidates) + + # Try to to avoid more than 10ish results or this will take forever + if len(search_results) >= 10: + log.warning("Found more than 10 results, stopping search") + break + + log.info(f"Search finished, found {len(search_results)} candidates") + + return sorted(search_results, key=matcher, reverse=True) + + +def performer_search( + query: str, + search_domains: list[str] | None = None, + postprocess: Callable[ + [ScrapedPerformer, dict], ScrapedPerformer + ] = default_postprocess, +) -> list[ScrapedPerformer]: + """ + Searches the Aylo API for the given query and returns a list of ScrapedPerformer + + If search_domains is provided it will only search those domains, + otherwise it will search all known domains (this could be very slow!) + + Domains should not include the "www." or ".com" parts of the domain: 'brazzers', 'realitykings', etc. + + If postprocess is provided it will be called on each result before returning + """ + if not query: + log.error("No query provided") + return [] + + if not search_domains: + log.warning("Searcing all known domains, this could be very slow!") + search_domains = domains.all_domains() + + log.debug(f"Searching for '{query}' on {len(search_domains)} sites") + + # The source of the results will be based on the token used (Brazzers, Reality Kings, etc.) + search_url = ( + f"https://site-api.project1service.com/v1/actors?search={query}&limit=10" + ) + search_results = [] + already_seen = set() + + def matcher(candidate: ScrapedPerformer): + return round( + difflib.SequenceMatcher( + None, + query.lower(), + candidate["name"].lower(), # type: ignore (name is always set) + ).ratio(), + 3, + ) + + for domain in search_domains: + log.debug(f"Searching {domain}") + + api_headers = _create_headers_for(domain) + api_response = __api_request(search_url, api_headers) + if api_response is None: + log.error(f"Failed to search {domain}") + continue + if not api_response: + log.debug(f"No results from {domain}") + continue + + candidates = [ + postprocess(to_scraped_performer(result, domain), result) + for result in api_response + ] + + search_results.extend( + c + for c in candidates + if matcher(c) > 0.5 and c.get("name") not in already_seen + ) + already_seen.update(c.get("name") for c in candidates) + + # Try to to avoid more than 10ish results or this will take forever + if len(search_results) >= 10: + log.warning("Found more than 10 results, stopping search") + break + + log.debug(f"Search finished, found {len(search_results)} candidates") + + return sorted(search_results, key=matcher, reverse=True) + + +def scene_from_fragment( + fragment: dict, + search_domains: list[str] | None = None, + min_ratio=config.SET_RATIO, + postprocess: Callable[[ScrapedScene, dict], ScrapedScene] = default_postprocess, +) -> ScrapedScene | None: + """ + Scrapes a scene from a fragment, which must contain at least one of the following: + - url: the URL of the scene + - title: the title of the scene + + If domains is provided it will only search those domains, + otherwise it will search all known domains (this could be very slow!) + + If min_ratio is provided _AND_ the fragment contains a title but no URL, + the search will only return a scene if a match with at least that ratio is found + + If postprocess is provided it will be called on the result before returning + """ + log.debug(f"Fragment scraping scene {fragment['id']}") + if url := fragment.get("url"): + log.debug(f"Using scene URL: '{url}'") + if scene := scene_from_url(url, postprocess=postprocess): + return scene + log.debug("Failed to scrape scene from URL") + if title := fragment.get("title"): + log.debug(f"Searching for '{title}'") + if scene := find_scene( + title, search_domains, min_ratio, postprocess=postprocess + ): + return scene + log.debug("Failed to find scene by title") + + log.warning("Cannot scrape from this fragment: need to have title or url set") + + +def performer_from_fragment( + fragment: dict, + search_domains: list[str] | None = None, + min_ratio=0.9, + postprocess: Callable[ + [ScrapedPerformer, dict], ScrapedPerformer + ] = default_postprocess, +) -> ScrapedPerformer | None: + """ + Scrapes a performer from a fragment, which must contain one of the following: + - url: the URL of the performer page (anywhere in the Aylo network) + - name: the name of the performer + + If domains is provided it will only search those domains, + otherwise it will search all known domains (this could be very slow!) + + If min_ratio is provided _AND_ the fragment contains a title but no URL, + the search will only return a scene if a match with at least that ratio is found + + If postprocess is provided it will be called on the result before returning + """ + log.debug("Fragment scraping performer...") + if url := fragment.get("url"): + log.debug(f"Using performer URL: '{url}'") + return performer_from_url(url, postprocess=postprocess) + elif name := fragment.get("name"): + log.debug(f"Searching for '{name}'") + return find_performer(name, search_domains, min_ratio, postprocess=postprocess) + + log.warning("Cannot scrape from this fragment: need to have url or name set") + + +def main_scraper(): + """ + Takes arguments from stdin or from the command line and dumps output as JSON to stdout + """ + op, args = scraper_args() + result = None + match op, args: + case "scene-by-url", {"url": url} if url: + result = scene_from_url(url) + case "scene-by-name", {"name": name, "extra": _domains} if name: + result = scene_search(name, search_domains=_domains) + case "scene-by-fragment" | "scene-by-query-fragment", args: + _domains = args.get("extra", None) + result = scene_from_fragment(args, search_domains=_domains) + case "performer-by-url", {"url": url}: + result = performer_from_url(url) + case "performer-by-fragment", args: + _domains = args.get("extra", None) + result = performer_from_fragment(args, search_domains=_domains) + case "performer-by-name", {"name": name, "extra": _domains} if name: + result = performer_search(name, search_domains=_domains) + case "movie-by-url", {"url": url} if url: + result = movie_from_url(url) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) + + +if __name__ == "__main__": + main_scraper() diff --git a/scrapers/AyloAPI/slugger.py b/scrapers/AyloAPI/slugger.py new file mode 100644 index 0000000..967282f --- /dev/null +++ b/scrapers/AyloAPI/slugger.py @@ -0,0 +1,82 @@ +import re + +""" +This ports the kebabCase function from lodash to Python. It is used to generate +slugs for the URLs for scenes, performers and movies scraped from the Aylo API. + +https://github.com/lodash/lodash/blob/main/src/kebabCase.ts +""" + +rsAstralRange = "\\ud800-\\udfff" +rsComboMarksRange = "\\u0300-\\u036f" +reComboHalfMarksRange = "\\ufe20-\\ufe2f" +rsComboSymbolsRange = "\\u20d0-\\u20ff" +rsComboMarksExtendedRange = "\\u1ab0-\\u1aff" +rsComboMarksSupplementRange = "\\u1dc0-\\u1dff" +rsComboRange = ( + rsComboMarksRange + + reComboHalfMarksRange + + rsComboSymbolsRange + + rsComboMarksExtendedRange + + rsComboMarksSupplementRange +) +rsDingbatRange = "\\u2700-\\u27bf" +rsLowerRange = "a-z\\xdf-\\xf6\\xf8-\\xff" +rsMathOpRange = "\\xac\\xb1\\xd7\\xf7" +rsNonCharRange = "\\x00-\\x2f\\x3a-\\x40\\x5b-\\x60\\x7b-\\xbf" +rsPunctuationRange = "\\u2000-\\u206f" +rsSpaceRange = " \\t\\x0b\\f\\xa0\\ufeff\\n\\r\\u2028\\u2029\\u1680\\u180e\\u2000\\u2001\\u2002\\u2003\\u2004\\u2005\\u2006\\u2007\\u2008\\u2009\\u200a\\u202f\\u205f\\u3000" +rsUpperRange = "A-Z\\xc0-\\xd6\\xd8-\\xde" +rsVarRange = "\\ufe0e\\ufe0f" +rsBreakRange = rsMathOpRange + rsNonCharRange + rsPunctuationRange + rsSpaceRange + +rsApos = "['\u2019]" +rsBreak = f"[{rsBreakRange}]" +rsCombo = f"[{rsComboRange}]" +rsDigit = "\\d" +rsDingbat = f"[{rsDingbatRange}]" +rsLower = f"[{rsLowerRange}]" +rsMisc = f"[^{rsAstralRange}{rsBreakRange + rsDigit + rsDingbatRange + rsLowerRange + rsUpperRange}]" +rsFitz = "\\ud83c[\\udffb-\\udfff]" +rsModifier = f"(?:{rsCombo}|{rsFitz})" +rsNonAstral = f"[^{rsAstralRange}]" +rsRegional = "(?:\\ud83c[\\udde6-\\uddff]){2}" +rsSurrPair = "[\\ud800-\\udbff][\\udc00-\\udfff]" +rsUpper = f"[{rsUpperRange}]" +rsZWJ = "\\u200d" + +rsMiscLower = f"(?:{rsLower}|{rsMisc})" +rsMiscUpper = f"(?:{rsUpper}|{rsMisc})" +rsOptContrLower = f"(?:{rsApos}(?:d|ll|m|re|s|t|ve))?" +rsOptContrUpper = f"(?:{rsApos}(?:D|LL|M|RE|S|T|VE))?" +reOptMod = f"{rsModifier}?" +rsOptVar = f"[{rsVarRange}]?" +rsOptJoin = f"(?:{rsZWJ}(?:{('|').join([rsNonAstral, rsRegional, rsSurrPair])}){rsOptVar + reOptMod})*" +rsOrdLower = "\\d*(?:1st|2nd|3rd|(?![123])\\dth)(?=\\b|[A-Z_])" +rsOrdUpper = "\\d*(?:1ST|2ND|3RD|(?![123])\\dTH)(?=\\b|[a-z_])" +rsSeq = rsOptVar + reOptMod + rsOptJoin +rsEmoji = rf"(?:{('|').join([rsDingbat, rsRegional, rsSurrPair])}){rsSeq}" + +reUnicodeWords = re.compile( + "|".join( + [ + f"{rsUpper}?{rsLower}+{rsOptContrLower}(?={('|').join([rsBreak, rsUpper, '$'])})", + f"{rsMiscUpper}+{rsOptContrUpper}(?={('|').join([rsBreak, rsUpper + rsMiscLower, '$'])})", + f"{rsUpper}?{rsMiscLower}+{rsOptContrLower}", + f"{rsUpper}+{rsOptContrUpper}", + rsOrdUpper, + rsOrdLower, + f"{rsDigit}+", + rsEmoji, + ] + ) +) + +reAsciiWords = re.compile(r"[^\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\x7f]+") + + +def slugify(string): + cleaned = re.sub("['\u2019]", "", string) + if reUnicodeWords.search(cleaned): + return "-".join(reUnicodeWords.findall(cleaned)).lower() + return "-".join(reAsciiWords.findall(cleaned)).lower() diff --git a/scrapers/AyloAPI/test_cases.md b/scrapers/AyloAPI/test_cases.md new file mode 100644 index 0000000..63abb4f --- /dev/null +++ b/scrapers/AyloAPI/test_cases.md @@ -0,0 +1,514 @@ +Tested with the following networks: + +# Babes + +- 7 studios listed on StashDB, 6 scrapable from API + Missing: Babes Live (which only has one scene) +- Scenes from Black is Better have been moved into Babes Unleashed +- No special handling required + +https://www.babes.com/scene/4474211/forbidden-fruit - Babes +https://www.babes.com/scene/3108261/fill-her-up - Babes Unleashed / Babes +https://www.babes.com/scene/3098571/like-stepmother-like-stepdaughter - Step Mom Lessons / Babes + +# Bang Bros + +- 70 studios listed on StashDB, 54 scrapable from API + Missing: BangBros Worldwide, Big Ass Adventure, BrandiBelle.com, Busty Adventures, CFNM Show, College Rules, Dancing Bear, Fart Hammer, Filthy Family, Mia Khalifa, Mom's Anal Adventure, My Dirty Vault, MyGF, Public Invasion, Sex Busters, XXX Pawn +- All scene URLs use 'video' instead of 'scene' between domain and scene ID but performer URLs are fine +- Should translate old-style URLs to new-style URLs by checking for redirects: + - https://bangbros.com/video116453/eva-lovia + - https://bangbros.com/video/9027421/eva-lovia +- Name adjustments: + AvaSpice -> Ava Spice + MomIsHorny -> Mom Is Horny + +https://bangbros.com/video/9719651/big-poolside-booty - Ass Parade / BangBros +https://bangbros.com/video/9087751/cock-a-smile - AvaSpice / BangBros +https://bangbros.com/video/8968901/sexy-latinas-good-dicking - Back Room Facials / BangBros +https://bangbros.com/video/9092331/a-taste-of-vanilla - Backroom MILF / BangBros +https://bangbros.com/video/9115601/erika-vution-rocked-my-cock - Ball Honeys / BangBros +https://bangbros.com/video/9420961/zodiac-fuck - Bang Bus / BangBros +https://bangbros.com/video/8985221/sexy-selena - Bang Casting / BangBros +https://bangbros.com/video/9214171/fuck-me-in-the-ass-please - Bang POV / BangBros +https://bangbros.com/video/9063991/sex-tape-5 - Bang Tryouts / BangBros +https://bangbros.com/video/9580671/stepdad-gets-caught-in-action - BangBros 18 / BangBros +https://bangbros.com/video/9027421/eva-lovia - BangBros Angels / BangBros +https://bangbros.com/video/9428961/new-shower-new-pussy - Bangbros Clips / BangBros +https://bangbros.com/video/9010721/big-ass-big-tits-gianna-works-out-that-juicy-pussy - BangBros Remastered / BangBros +https://bangbros.com/video/8863501/summer-getting-nasty - BangBros Vault / BangBros +https://bangbros.com/video/9155231/asian-nympho-loves-to-get-fuck - Big Mouthfuls / BangBros +https://bangbros.com/video/9297791/nikka-realy-needs-the-cream- - Big Tit Cream Pie / BangBros +https://bangbros.com/video/9429171/puerto-rican-pussy-loves-oil - Big Tits, Round Asses / BangBros +https://bangbros.com/video/8906201/arietta-sucks-her-stepbrothers-dick - BlowJob Fridays / BangBros +https://bangbros.com/video/9120031/jenaveve-gets-down-and-dirtty - Blowjob Ninjas / BangBros +https://bangbros.com/video/9149391/black-gold - Boob Squad / BangBros +https://bangbros.com/video/9497411/stretching-out-august - Brown Bunnies / BangBros +https://bangbros.com/video/9032981/rachel-starr-and-the-hoagie-hero - Can He Score / BangBros +https://bangbros.com/video/9124101/she-does-cook-anal - Casting / BangBros +https://bangbros.com/video/9041131/doesnt-get-better-then-jynx-maze - Chongas / BangBros +https://bangbros.com/video/8968571/colombian-newbie-gets-slammed - Colombia Fuck Fest / BangBros +https://bangbros.com/video/9097911/pin-up-doll - Dirty World Tour / BangBros +https://bangbros.com/video/9034361/pornstars-raid-the-dorm-full-of-college-boys - Dorm Invasion / BangBros +https://bangbros.com/video/8992381/slutty-amateur-gets-a-facial - Facial Fest / BangBros +https://bangbros.com/video/9027201/fuckteam-beach-time - Fuck Team Five / BangBros +https://bangbros.com/video/8971801/thick-latina-deep-throats-in-the-gloryhole - Glory Hole Loads / BangBros +https://bangbros.com/video/9032011/puerto-rican-flavor-weve-evans - Latina Rampage / BangBros +https://bangbros.com/video/9083961/one-with-nature - Living With Anna / BangBros +https://bangbros.com/video/9129461/anita-blues-anal-salute - MILF Lessons / BangBros +https://bangbros.com/video/9044481/milf-takes-a-pounding - Milf Soup / BangBros +https://bangbros.com/video/9059841/feet-that-will-make-you-squirt - Magical Feet / BangBros +https://bangbros.com/video/9401771/step-mom-gives-blu-balls - MomIsHorny / BangBros +https://bangbros.com/video/8852571/tiny-pussy-gets-fucked - Monsters of Cock / BangBros +https://bangbros.com/video/9139961/alexis-malone - Mr CamelToe / BangBros +https://bangbros.com/video/8986901/cristi-ann-gets-an-anal-pounding - Mr. Anal / BangBros +https://bangbros.com/video/8838251/anal-gymnastics - My Dirty Maid / BangBros +https://bangbros.com/video/9088421/suzanny-the-dog-rescuer - My Life In Brazil / BangBros +https://bangbros.com/video/9100111/pussy-on-wheels - Newbie Black / BangBros +https://bangbros.com/video/8996361/keisha-grey-fucked-by-black-cock - Pawg / BangBros +https://bangbros.com/video/9035831/amateur-lesbians-fuck - Party of 3 / BangBros +https://bangbros.com/video/9119001/olivia-olovely-and-from-one-million-to-billion - Penny Show / BangBros +https://bangbros.com/video/9041761/sexy-brunette-massaged-and-fucked-hardcore - Porn Star Spa / BangBros +https://bangbros.com/video/9097431/freaky-slumber-munch - Power Munch / BangBros +https://bangbros.com/video/8935091/selva-gets-public-anal - Public Bang / BangBros +https://bangbros.com/video/9026281/amateur-blonde-slut-gang-banged - Slutty White Girls / BangBros +https://bangbros.com/video/9007731/my-stepmom-teaches-me-how-to-fuck-my-bf - Stepmom Videos / BangBros +https://bangbros.com/video/9133631/tour-d-fuck - Street Ranger / BangBros +https://bangbros.com/video/9015181/hot-latina-pornstar-jerks-off-a-dick - Tugjobs / BangBros +https://virtualporn.com/video/9359281/too-horny-to-fail - Virtual Porn / BangBros +https://bangbros.com/video/9123731/latin-fever - Working Latinas / BangBros + +# Brazzers + +- 49 studios listed on StashDB, 28 scrapable from API + Missing: Brazzers en Español, Butts & Blacks, Charles Dera, Desiree Dulce, Emily Willis, Euro Babes, Jizz On My Jugs, Jordi, Kendra Lust, Lacy Lennon, Lil D, Luna Star, Pornstars Punishment, Rachel Starr, Racks & Blacks, SexPro Adventures, Sofia Rose, Sophie Dee, Teens Like It Black, VRT, Xander Corvus +- All scene URLs use 'video' instead of 'scene' between domain and scene ID +- All performer URLs use 'pornstar' instead of 'model' between domain and scene ID +- If scene has tag "Brazzers Live" then studio should be "Brazzers Live" +- Name adjustments: + JugFuckers -> Jug Fuckers + Shes Gonna Squirt -> She's Gonna Squirt + +https://www.brazzers.com/video/3788751/from-russia-with-lust - Asses in Public / Brazzers +https://www.brazzers.com/video/4423671/chores-suck-and-so-do-i - Baby Got Boobs / Brazzers +https://www.brazzers.com/video/4445271/big-ass-anal-for-a-heavy-load - Big Butts Like It Big / Brazzers +https://www.brazzers.com/video/3831901/principal-photography - Big Tits at School / Brazzers +https://www.brazzers.com/video/4658471/virtual-fuckfest - Big Tits at Work / Brazzers +https://www.brazzers.com/video/3847871/kortney-s-slutty-circuit-training - Big Tits In Sports / Brazzers +https://www.brazzers.com/video/3851201/dinners-on-me - Big Tits In Uniform / Brazzers +https://www.brazzers.com/video/4404989/britney-s-jeans - Big Wet Butts / Brazzers +https://www.brazzers.com/video/9311401/medical-ass-istance-required - BrazzersExxtra / Brazzers +https://www.brazzers.com/video/3873921/i-gotta-have-my-moms-boyfriend - Brazzers Vault / Brazzers +https://www.brazzers.com/video/3793201/big-natural-round-boobs - Busty & Real / Brazzers +https://www.brazzers.com/video/3882961/pretty-face-with-big-tits - Bustyz / Brazzers +https://www.brazzers.com/video/3899601/you-want-us-to-clean-your-dick - CFNM / Brazzers +https://www.brazzers.com/video/4415556/siouxsie-q-s-anal-kitchen-cleaning - Day With A Pornstar / Brazzers +https://www.brazzers.com/video/4393739/im-sensitive - Dirty Masseur / Brazzers +https://www.brazzers.com/video/3912371/psycho-anal-ysis - Doctor Adventures / Brazzers +https://www.brazzers.com/video/4505171/anal-workout-with-bestie - Hot And Mean / Brazzers +https://www.brazzers.com/video/3924151/everything-is-bigger-in-texas - Hot Chicks Big Asses / Brazzers +https://www.brazzers.com/video/3936411/big-natural-juggs - JugFuckers / Brazzers +https://www.brazzers.com/video/4395708/i-m-over-it - Milfs Like It Big / Brazzers +https://www.brazzers.com/video/3940591/ninas-chapel-of-lust-part-1 - Mommy Got Boobs / Brazzers +https://www.brazzers.com/video/3963591/you-need-mums-approval - Moms in control / Brazzers +https://www.brazzers.com/video/4445561/the-garden-of-demi-s-delights - Pornstars Like it Big / Brazzers +https://www.brazzers.com/video/4410374/sex-with-the-therapist - Real Wife Stories / Brazzers +https://www.brazzers.com/video/4015911/wheres-my-valentine - Shes Gonna Squirt / Brazzers +https://www.brazzers.com/video/4327711/mc2-ass - Teens Like It Big / Brazzers +https://www.brazzers.com/video/3789801/the-exxxceptions-episode-1 - ZZ Series / Brazzers + +# Bromo + +- Single studio but has some substudio elements? Will flatten to just "Bromo" with no parent + +https://www.bromo.com/scene/4412747/tattoo-fuck - Bromo US / Bromo +https://www.bromo.com/scene/3227341/breaking-him-scene-1 - Bromo BlackMaleMe / Bromo + +# CzechHunter (formerly BigStr) + +- Now known as Czech Hunter, seems to be actively restructuring: during development of this scraper several scenes vanished from public site +- 3 studios listed on StashDB, all 3 scrapable from API +- All studios have their own domain + +https://www.debtdandy.com/scene/4300191/debt-dandy-160 - Debt Dandy / BigStr +https://www.dirtyscout.com/scene/4508331/dirty-scout-283 - Dirty Scout / BigStr +https://www.czechhunter.com/scene/9575711/czech-hunter-718 - Czech Hunter / BigStr + +# Deviante + +- 5 studios listed on StashDB, all scrapable from API +- All scenes use 'video' instead of 'scene' between domain and scene ID +- All studios have their own domain +- Name adjustments: + es -> Erotic Spice + fmf -> Forgive Me Father + lha -> Love Her Ass + pdt -> Pretty Dirty Teens + sw -> Sex Working + +https://www.eroticspice.com/video/4422220/busty-japanese-milf-fucks-shy-guy - es / Deviante +https://www.forgivemefather.com/video/4493281/father-s-righteous-ritual - fmf / Deviante +https://www.loveherass.com/video/4424461/playful-roomie-loves-anal - lha / Deviante +https://www.prettydirtyteens.com/video/7939981/creampie-for-sneaky-college-teen - pdt / Deviante +https://www.deviante.com/video/4647241/cash-for-happy-ending-with-masseuse - sw / Deviante +https://www.sexworking.com/video/4474711/brazilian-escort-summoned-to-please - sw / Deviante + +# Digital Playground + +- 6 studios listed on StashDB, all scrapable from API +- Name adjustments: + dpw -> DP World + Dpstar Episodes -> Episodes + Dpstar Sex Challenges -> Sex Challenges + +https://www.digitalplayground.com/scene/4410802/sleepless-nights-scene-1 - Digital Playground +https://www.digitalplayground.com/scene/8353721/tourist-trap-episode-4 - dpw / Digital Playground +https://www.digitalplaygroundnetwork.com/scene/8353721/tourist-trap-episode-4 - dpw / Digital Playground +https://www.digitalplayground.com/scene/4132651/body-heat-scene-1 - Blockbuster / Digital Playground +https://www.digitalplayground.com/scene/4176641/ass-effect-a-xxx-parody - DP Parody / Digital Playground +https://www.digitalplayground.com/scene/4416872/auditions-part-1 - Dpstar Episodes / Digital Playground +https://www.digitalplayground.com/scene/4187051/bad-babysitter-episode-1 - Episodes / Digital Playground +https://www.digitalplayground.com/scene/4416848/luna-star-in-dp-star-sex-challenge - Dpstar Sex Challenges / Digital Playground +https://www.digitalplayground.com/scene/4489851/bouncer-bitch - Flixxx / Digital Playground +https://www.digitalplayground.com/scene/4186511/ghost-of-blowjobs-past - Rawcut - Digital Playground + +# Erito + +- No special handling required + +https://www.erito.com/scene/4653021/nonstop-sticky-creampie - Erito + +# FakeHub + +- 11 studios listed on StashDB, all scrapable from API +- All performer profiles have 'modelprofile' instead of 'model' between domain and scene ID +- Some studios have their own domain + +https://www.fakehub.com/scene/4411255/punk-rocker-loves-rough-sex - Fake Agent / FakeHub +https://www.fakehub.com/scene/4083631/cum-splattered-face-for-petite-teen - Fake Agent UK / FakeHub +https://www.fakehub.com/scene/4087511/unregistered-driver-creampied-by-cop - Fake Cop / FakeHub +https://www.fakehub.com/scene/4474671/students-accidental-tik-tok-bag-challenge - Fake Driving School / FakeHub +https://www.fakehub.com/scene/4084801/holiday-maker-strikes-a-sexual-deal - Fake Hospital / FakeHub +https://www.fakehostel.com/scene/9352861/sudden-threesome-for-new-lovers - Fake Hostel / FakeHub +https://www.faketaxi.com/scene/9642961/cute-brazilian-gives-xmas-tip - Fake Taxi / FakeHub +https://www.fakehub.com/scene/9642841/free-use-mail-order-e-girl - Fakehub Originals / FakeHub +https://www.fakehub.com/scene/4079471/sexy-lesbian-christmas-casting - Female Agent / FakeHub +https://www.fakehub.com/scene/4485911/curvy-driver-gets-a-hard-dicking - Female Fake Taxi / FakeHub +https://www.publicagent.com/scene/4498351/what-is-the-spanish-for-blowjob - Public Agent / FakeHub + +# Gay Wire + +- 18 studios listed on StashDB, 13 scrapable from API + Missing: Bigdaddy, Gay Patrol, Gay Pawn, Grab Ass, Project City Bus + Suspicious: Gay Selector? Doesn't seem to belong to Gay Wire + New: Sausage Party, Urban Invasion +- BangBros needs to be replaced with Gay Wire in all studio objects +- bangbros.com needs to be replaced with gaywire.com in all URLs +- Should translate old-style URLs to new-style URLs by checking for redirects: + https://gaywire.com/h1/video487/cock-sucking-galore + https://gaywire.com/scene/9322311/cock-sucking-galore +- Name adjustments: + Its Gonna Hurt -> It's Gonna Hurt + Poundhisass -> Pound His Ass + +https://gaywire.com/scene/9315611/double-ginger - Bait Bus / BangBros +https://gaywire.com/scene/9321281/virgin-anal-sex-with-a-big-dick - Bareback Attack / BangBros +https://gaywire.com/scene/9320721/georgio-fucks-like-an-animal - Bareback Casting / BangBros +https://gaywire.com/scene/9322311/cock-sucking-galore - ExBF / BangBros +https://gaywire.com/scene/9326111/slosh-balls - Haze Him / BangBros +https://gaywire.com/scene/9325601/shockingly-painful - Its Gonna Hurt / BangBros +https://gaywire.com/scene/9329621/public-anal-sex-in-europe - Out In Public / BangBros +https://gaywire.com/scene/8942881/aaron-trainer-trains-his-cock - Poundhisass / BangBros +https://gaywire.com/scene/9335691/oil-massaged-anal-sex - Rub Him / BangBros +https://gaywire.com/scene/9332131/crazy-in-the-club - Sausage Party / BangBros +https://gaywire.com/scene/9332821/hunting-in-the-heart-of-darkness - Thug Hunter / BangBros +https://gaywire.com/scene/9336331/cock-surprise - UngloryHole / BangBros +https://gaywire.com/scene/9333241/two-hot-jocks - Urban Invasion / BangBros + +# Hentai Pros + +- No special handling required + +https://www.hentaipros.com/scene/8359071/girlfriend-stealing-app - Hentai Pros +https://www.hentaiprosnetwork.com/scene/2686971/boin-babes-at-the-resort-2 - Hentai Pros + +# Men.com + +- 10 studios listed on StashDB, all scrapable from API +- Consolidated most studios URLs into men.com: old URLs should be supported, but only men.com should be output + Exception: TwinkPop still has a separate domain +- All scene URLs use 'sceneid' instead of 'scene' between domain and scene ID + Exception: TwinkPop uses 'scene' or 'video': we'll stick with 'scene' for consistency +- All performer URLs use 'modelprofile' instead of 'model' between domain and scene ID + Exception: TwinkPop uses 'pornstar' +- Name adjustments: + tp -> TwinkPop + Men -> Men.com + +https://www.bigdicksatschool.com/sceneid/4405350/more-spice-than-sugar - Big Dicks At School / Men +https://www.men.com/sceneid/4481271/the-rookie-lifeguard - Drill My Hole / Men +https://www.godsofmen.com/scene/4392307/tied-to-you - Gods of Men / Men +https://www.jizzorgy.com/scene/3707561/the-calendar-shoot - Jizz Orgy / Men +https://www.menofuk.com/scene/3711691/men-in-crack - Men of UK / Men +https://www.men.com/sceneid/9736131/ex-con-hard-on - Men +https://www.str8togay.com/scene/4357781/rodeo-romeo - Str8 to Gay / Men +https://www.thegayoffice.com/scene/3712041/unexpected-revenge - The Gay Office / Men +https://www.toptobottom.com/scene/3716161/the-cleaner - Top to Bottom / Men +https://www.twinkpop.com/video/9634991/sweet-twink-sweat - tp / Men + +# Metro HD + +- 5 studios listed on StashDB, 5 scrapable from API +- All studios have their own domain +- metro.com needs to be replaced with metrohd.com as a fallback +- Name adjustments: + Metro -> Metro HD + +https://www.devianthardcore.com/scene/4414757/sexy-domme-abigail-mac-uses-her-good-submissive-maddy-o-reilly - Deviant Hardcore / Metro +https://www.familyhookups.com/scene/9670731/aidra-fox-and-aubree-valentine - Family Hook Ups / Metro +https://www.girlgrind.com/scene/4471381/latina-babe-daisy-marie-gets-d-pd-with-toys-by-hot-blonde-sammie-rhodes - Girl Grind / Metro +https://www.kinkyspa.com/scene/9638571/nicole-doshi-gets-a-spa-day-for-her-anniversary - Kinky Spa / Metro +https://www.shewillcheat.com/scene/9670841/hot-blonde-lilly-bell-gets-fucked-by-her-marriage-counselor - She Will Cheat / Metro + +# Mile High Media (Straight) + +- Appear to segregate their bi / gay / trans content across domains, but according to the API they're all MHM + Have split them across 3 scrapers but could undergo consolidation or further splitting at a later date +- 16 studio listed on StashDB, 12 scrapable from API + Missing: Cherry Pop, Couples Seeking Teens (folded into Reality Junkies?), Gilfed, Mile High Xtreme +- milehigh.com needs to be replaced with milehighmedia.com in all URLs +- milehigh.com can be replaced by studios own domain +- Name adjustments: + dlf -> Dilfed + DogHouseDigital -> Doghouse Digital + LesbianOlderYounger -> Lesbian Older Younger + RealityJunkies -> Reality Junkies + SweetSinner -> Sweet Sinner + SweetHeartVideo -> Sweetheart Video + +https://www.dilfed.com/scene/9292051/cheating-bbq - dlf / Mile High Media +https://www.doghousedigital.com/scene/9393951/amazing-tits-14-scene-2-titty-action - DogHouseDigital / Mile High Media +https://www.familysinners.com/scene/9381621/in-laws-2-episode-2-keep-it-in-the-family - Family Sinners / Mile High Media +https://www.milfed.com/scene/4365378/older-women-crave-chicks-02-scene-1 - LesbianOlderYounger / Mile High Media +https://www.milfed.com/scene/9497251/fun-at-the-physio - Milfed / Mile High Media +https://www.realityjunkies.com/scene/9446181/free-use-families-2-scene-3-cant-resist - RealityJunkies / Mile High Media +https://www.sweetsinner.com/scene/9585781/the-voyeur-6-scene-1-sneaky - SweetSinner / Mile High Media +https://www.sweetheartvideo.com/scene/4651721/lesbian-stepmother-7-scene-4 - SweetHeartVideo / Mile High Media + +# Mile High Media (Trans and Bi) + +- milehigh.com needs to be replaced with milehighmedia.com in all URLs +- milehigh.com can be replaced by studio domain +- Name adjustments: + BIEmpire -> Bi Empire + +https://www.biempire.com/scene/4421745/dirty-pictures - BIEmpire / Mile High Media +https://www.transsensual.com/scene/4653681/sheer-panties-cumshot-gets-her-guy - Transsensual / Mile High Media + +# Mile High Media (Gay) + +- milehigh.com needs to be replaced with milehighmedia.com in all URLs +- milehigh.com can be replaced by studio domain + +https://www.iconmale.com/scene/4653701/hot-daddies-3-scene-1 - Icon Male / Mile High Media +https://www.taboomale.com/scene/4373622/forgive-me-scene-1 - Icon Male / Mile High Media +https://www.noirmale.com/scene/9635521/christmas-proposal - Noir Male / Mile High Media + +# Why Not Bi + +- Single studio but the parent studio comes back from the API as "WhyNotBy": flattening this into just "Why Not Bi" + +https://www.whynotbi.com/scene/4643151/glory-ous-wet-threesome - Why Not Bi / WhyNotBy + +# Mofos + +- 28 studios listed on StashDB, 21 scrapable from API + Missing: Border Patrol Sex, Can She Take It, Ebony Sex Tapes (folded into Mofos B Sides), Mofos Live, Mofos Old School, Teens At Work +- Some studios have their own domain + Older domains like letstryanal.com, dontbreakme.com should still be supported URLs but not output + +- Name adjustments: + lpi -> Let's Post It + Lets Try Anal -> Let's Try Anal + +https://www.mofos.com/scene/2978471/webcamming-babysitter-learns-to-fuck - Busted Babysitters / Mofos +https://www.dontbreakme.com/scene/4410806/dont-break-aria - Don't Break Me / Mofos +https://www.mofos.com/scene/2984901/country-riding - Drone Hunter / Mofos +https://www.mofos.com/scene/2986081/breaking-entering-lesbian-couple - Girls Gone Pink / Mofos +https://www.iknowthatgirl.com/scene/9670091/influencer-coco-is-a-baddie - I Know That Girl / Mofos +https://www.mofos.com/scene/2982821/gym-class-fuckers - In Gang We Bang / Mofos +https://www.mofos.com/scene/2997891/public-flashing-in-the-street - Latina Sex Tapes / Mofos +https://www.mofos.com/scene/9659071/fuck-buddy-reunion - lpi / Mofos +https://www.letstryanal.com/scene/4620941/gf-loses-game-and-now-she-must-give-up-her-ass - Lets Try Anal / Mofos +https://www.mofos.com/scene/3014421/working-out-aint-working-out - Milfs Like It Black / Mofos +https://www.mofos.com/scene/2982291/big-booty-nurse-heals-sick-bf - Mofos B Sides / Mofos +https://www.mofosnetwork.com/scene/3009491/sweet-naomi - MOFOS Lab / Mofos +https://www.mofos.com/scene/3009451/euro-fuckdoll-takes-a-load - Mofos World Wide / Mofos +https://www.mofos.com/scene/4358051/smokeshow - Pervs On Patrol / Mofos +https://www.mofos.com/scene/3027181/riley-reid-doesnt-wear-panties - Pornstar Vote / Mofos +https://www.mofos.com/scene/3027261/petite-teens-rv-fuck - Project RV / Mofos +https://www.mofos.com/scene/4350950/cant-dickline-cash - Pubic Pickups / Mofos +https://www.mofos.com/scene/3037521/boats-n-hoes - Real Slut Party / Mofos +https://www.mofos.com/scene/9483651/new-dick-to-forget-your-ex - Share My BF / Mofos +https://www.mofos.com/scene/3046411/blondes-do-have-all-the-fun - She's A Freak / Mofos +https://www.mofos.com/scene/3218041/stranded-in-my-feelings - Stranded Teens / Mofos +https://www.mofos.com/scene/3051541/hot-brunette-scouts-a-stranger - The Sex Scout / Mofos + +# Property Sex + +- 3 studios listed on StashDB (if we include the "network"), all 3 scrapable from API +- House Humpers has its own domain + +https://www.propertysex.com/scene/4437501/id-be-a-great-roommate - Property Sex +https://www.propertysex.com/scene/4394526/going-that-extra-mile - Property Sex VR / Property Sex +https://househumpers.com/scene/9635471/we-work-better-together - House Humpers / Property Sex + +# Reality Dudes + +- 5 studios listed on StashDB (if we include the "network"), 4 scrapable from API + Missing: Gay Revenge +- realitydudes.com does not show model pages, but URLs are still scrapable +- Papi has its own domain +- Papi uses 'pornstar' instead of 'model' between domain and scene ID +- Papi does not seem to be producing any scenes of their own, so most of their links are actually for Reality Dudes, Sean Cody, Men.com etc. The scraper does not account for this and some scenes may need to be rescraped with other Aylo scrapers to get fully correct metadata + +https://www.realitydudes.com/scene/4466271/bedroom-bukkake - Reality Dudes +https://www.realitydudes.com/scene/2839091/cole - Str8 Chaser / Reality Dudes +https://www.realitydudes.com/scene/4475811/fitness-training - Dick Dorm / Reality Dudes +https://www.realitydudesnetwork.com/scene/9509871/malik-delgaty-drills-enzo-mullers-hole - Reality Dudes +https://www.realitydudes.com/scene/2834531/testicle-seduction - Papi / Reality Dudes + +# Reality Kings + +- 59 studios listed on StashDB, 45 scrapable from API + Missing: Bikini Crashers, Black GFs (folded into RK Prime), Cum Girls, Dangerous Dongs, Flower Tucci, GF Revenge (folded into GF Leaks), Project DTF, Real Orgasms, Round and Brown (folded into RK Prime), Saturday Night Latinas, Team Squirt, Top Shelf Pussy, Tranny Surprise (folded into TransHarder?), VIP Crew + 15 missing +- Look At Her Now seems to be in the process of being integrated: + it still has its own domain and some of its scenes come back from the API without Reality Kings as a parent +- Older sites had their own domain and their URLs should be supported but not output +- Name adjustments: + rks -> RK Shorts + +https://www.realitykings.com/scene/27/double-bubbles - 40 Inch Plus / Reality Kings +https://www.8thstreetlatinas.com/scene/2286648/my-secret-latina - 8th Street Latinas / Reality Kings +https://www.realitykings.com/scene/10255/cant-be-serious - Bad Tow Truck / Reality Kings +https://www.bignaturals.com/scene/4412702/second-thoughts - Big Naturals / Reality Kings +https://www.rk.com/scene/1110/career-woman - Big Tits Boss / Reality Kings +https://www.realitykings.com/scene/4442371/semen-sirens-of-the-high-seas - Captain Stabbin / Reality Kings +https://www.realitykings.com/scene/1501/can-you-get-hard - CFNM Secret / Reality Kings +https://www.realitykings.com/scene/2959551/naughty-nyomi - Crazy Asian GFs / Reality Kings +https://www.realitykings.com/scene/2957881/deuce-is-wild - Crazy College GFs / Reality Kings +https://www.cumfiesta.com/scene/2483428/cooch-couture - Cum Fiesta / Reality Kings +https://www.daredorm.com/scene/2954551/high-stakes - Dare Dorm / Reality Kings +https://www.eurosexparties.com/scene/10746/loving-to-fuck - Euro Sex Parties / Reality Kings +https://www.realitykings.com/scene/2603/teenie-bikini - Extreme Asses / Reality Kings +https://www.realitykings.com/scene/2667/redheaded-tit-youth - Extreme Naturals / Reality Kings +https://www.realitykings.com/scene/10812/ease-into-elsa - First Time Auditions / Reality Kings +https://www.gfleaks.com/scene/2955251/sexy-stash - GF Leaks / Reality Kings +https://www.realitykings.com/scene/3086/the-soloist - Girls of Naked / Reality Kings +https://www.happytugs.com/scene/3101/marvelous-mandi - Happy Tugs / Reality Kings +https://www.hdlove.com/scene/8376/rocking-remy - HD Love / Reality Kings +https://www.hornybirds.com/scene/2957491/divorcees-revenge - Horny Birds / Reality Kings +https://www.realitykings.com/scene/3194/sweet-puss - Hot Bush / Reality Kings +https://www.hotgirlsgame.com/scene/8805741/anal-obsessed-nerds - Hot Girls Game / Reality Kings +https://www.realitykings.com/scene/3272/doing-doubles - In the VIP / Reality Kings +https://www.lilhumpers.com/scene/4410213/pounding-the-prankster - Lil Humpers / Reality Kings +https://www.lookathernow.com/scene/4413126/taste-tester - Look At Her Now +https://www.realitykings.com/scene/4416606/whos-the-boss-now - Look At Her Now / Reality Kings +https://www.mikeinbrazil.com/scene/9776/sweet-caramel - Mike in Brazil / Reality Kings +https://www.realitykings.com/scene/10808/sexy-zazie - Mike's Apartment / Reality Kings +https://www.milfhunter.com/scene/1853742/showering-her-with-cum - Milf Hunter / Reality Kings +https://www.realitykings.com/scene/5097/in-the-mood - Milf Next Door / Reality Kings +https://www.momsbangteens.com/scene/2286917/anal-sex-education - Moms Bang Teens / Reality Kings +https://www.momslickteens.com/scene/2948131/vacation-sensation - Moms Lick Teens / Reality Kings +https://www.moneytalks.com/scene/8790/candy-cooch - Money Talks / Reality Kings +https://www.monstercurves.com/scene/4297681/choose-your-poison-2 - Monster Curves / Reality Kings +https://www.realitykings.com/scene/5606/private-packers - No Faces / Reality Kings +https://www.pure18.com/scene/5708/horny-chloe - Pure 18 / Reality Kings +https://www.recklessinmiami.com/scene/4393751/home-and-away - Reckless in Miami / Reality Kings +https://www.realitykings.com/scene/4654441/mai-i-oil-you-up - RK Prime / Reality Kings +https://www.realitykings.com/scene/9735791/filling-the-gamer-girls-ass - rks / Reality Kings +https://www.realitykings.com/scene/6477/my-lovely-lady - See My Wife / Reality Kings +https://www.sneakysex.com/scene/4410566/booty-camp - Sneaky Sex / Reality Kings +https://www.realitykings.com/scene/10688/juicy-sweet - Street BlowJobs / Reality Kings +https://www.teenslovehugecocks.com/scene/4417629/amazing-avery - Teens Love Huge Cocks / Reality Kings +https://www.welivetogether.com/scene/4414014/wlt-s01e02-new-arrivals - We Live Together / Reality Kings +https://www.realitykings.com/scene/8283/perfect-ten - Wives in Pantyhose / Reality Kings +https://www.workmeharder.com/scene/4655221/fuck-truck - Work Me Harder / Reality Kings + +# Sean Cody + +- Single studio but has some substudio elements? Will flatten to just "Sean Cody" with no parent + +https://www.seancody.com/scene/4652961/devy-brock-bareback - Sean Cody +https://www.seancody.com/scene/2711231/daniel-philip-bareback - SC BlackMaleMe / Sean Cody + +# SexyHub + +- 9 studios listed on StashDB, 6 scrapable from API + Missing: BDSM.xxx, HDPOV, Orgasms.XXX +- Some studios have their own domain +- All performer profiles have 'modelprofile' instead of 'model' between domain and scene ID + +https://www.danejones.com/scene/9585371/gamer-fucks-big-tits-italian-gf - Dane Jones / SexyHub +https://www.sexyhub.com/scene/9585341/big-cock-gym-perv-fucks-redhead - Fitness Rooms / SexyHub +https://www.sexyhub.com/scene/4227051/bananas - Girlfriends / SexyHub +https://www.lesbea.com/scene/9289091/orgasms-in-beautiful-lingerie - Lesbea / SexyHub +https://www.sexyhub.com/scene/4411925/deep-orgasms-for-petite-czech-babe - Massage Rooms / SexyHub +https://www.sexyhub.com/scene/9379531/long-legs-facesitting-with-stepmom - Mom XXX / SexyHub + +# Squirted + +- No special handling required + +https://www.squirted.com/scene/4485941/tiffanys-back-for-more - Squirted + +# TransAngels + +- No special handling required + +https://www.transangels.com/scene/2682551/cocked-behind-bars - TransAngels +https://www.transangelsnetwork.com/scene/9510091/strip-searched-and-fucked - TransAngels +https://www.transharder.com/scene/4420814/break-the-burglar - TransHarder + +# True Amateurs + +- No special handling required + +https://www.trueamateurs.com/scene/4333351/footjob-with-black-stockings - True Amateurs + +# Tube8Vip + +- All scenes come back as belonging to the studio Elite, mostly under the parent studio Premium: flatten to just "Tube8Vip" + +https://www.tube8vip.com/scene/2933481/why-everybody-loves-adriana - Elite / Premium + +# Twistys + +- 13 studios listed on StashDB, 11 scrapable from API + Missing: Naughty Staff, Twistys Live +- Blue Fantasies, Busty Ones and Euro Foxes are all mixed up and have confusing metadata in the API + This scene with Louise Glover falls under all 3 channels: https://www.twistys.com/scene/3474791/for-all-the-world-to-see + Shows up on the first page for Blue Fantasies: https://www.twistys.com/scenes?site=220 + Shows up on the 19th page for Busty Ones: https://www.twistys.com/scenes?page=19&site=229 + Shows up on the 13th page for Euro Foxes: https://www.twistys.com/scenes?page=13&site=226 + If a scene is in any or all of these collections we will make its studio the first one that matches alphabetically: this particular scene would fall under Blue Fantasies +- Name adjustments: + anettedawn -> Anette Dawn + TwistysHard -> Twistys Hard + whengirlsplay -> When Girls Play + +https://www.twistys.com/scene/3468571/anette-gets-a-naughty - anettedawn / Twistys +https://www.twistys.com/scene/3413521/the-artist-within-part-1 - Feature Film / Twistys +https://www.twistys.com/scene/4281921/dads-new-girlfriend - Mom Knows Best / Twistys +https://www.twistys.com/scene/3436211/wanna-play - Nicole Graves / Twistys +https://www.twistys.com/scene/8366721/let-me-show-you - Turning Twistys / Twistys +https://www.twistys.com/scene/3406841/fix-her-up-her - TwistysHard / Twistys +https://www.twistys.com/scene/3430461/work-those-asses - Twistys Teasers / Twistys +https://www.twistys.com/scene/9420281/lulus-lesbian-squirtfest - whengirlsplay / Twistys + +# VOYR + +- No special handling required + +https://www.voyr.com/scene/9729471/four-way-pleasure - Voyr diff --git a/scrapers/Babes.yml b/scrapers/Babes.yml new file mode 100644 index 0000000..0748b3e --- /dev/null +++ b/scrapers/Babes.yml @@ -0,0 +1,64 @@ +name: Babes +# requires: py_common, AyloAPI +# scrapes: Babes, Babes Unleashed, Black is Better (now Babes Unleashed), Elegant Anal, Office Obsession, Step Mom Lessons +sceneByURL: + - action: script + url: + - babes.com/scene/ + script: + - python + - AyloAPI/scrape.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - AyloAPI/scrape.py + - babes + - scene-by-fragment +sceneByName: + action: script + script: + - python + - AyloAPI/scrape.py + - babes + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - AyloAPI/scrape.py + - babes + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - AyloAPI/scrape.py + - babes + - performer-by-name +performerByURL: + - action: script + url: + - babes.com/model/ + script: + - python + - AyloAPI/scrape.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - AyloAPI/scrape.py + - performer-by-fragment +movieByURL: + - action: script + url: + - babes.com/movie/ + # Since scenes link to the movie we can scrape movies from scenes + - babes.com/scene/ + script: + - python + - AyloAPI/scrape.py + - movie-by-url +# Last Updated January 13, 2024 diff --git a/scrapers/BangBros/BangBros.py b/scrapers/BangBros/BangBros.py new file mode 100644 index 0000000..7fa0462 --- /dev/null +++ b/scrapers/BangBros/BangBros.py @@ -0,0 +1,90 @@ +import json +import sys +from requests import head +from typing import Any +from py_common import log +from py_common.util import dig, replace_all, replace_at +from AyloAPI.scrape import ( + scraper_args, + scene_from_url, + scene_search, + scene_from_fragment, + performer_from_url, + performer_from_fragment, + performer_search, + movie_from_url, +) + +studio_map = { + "AvaSpice": "Ava Spice", + "MomIsHorny": "Mom Is Horny", +} + + +def redirect(url: str) -> str: + if not url: + return url + if (res := head(url)) and (redirect := res.headers.get("Location", url)): + return redirect if not redirect.endswith("404") else url + return url + + +def bangbros(obj: Any, _) -> Any: + domain = ( + "virtualporn.com" + if dig(obj, "studio", "name") == "Virtual Porn" + else "bangbros.com" + ) + + # All bangbros URLs omit the standard www. subdomain prefix + # and all scene URLs use /video/ instead of the standard /scene/ + fixed = replace_all( + obj, + "url", + lambda x: x.replace("/scene/", "/video/").replace("www.bangbros.com", domain), + ) + + # Rename certain studios according to the map + fixed = replace_at( + fixed, "studio", "name", replacement=lambda x: studio_map.get(x, x) + ) + + return fixed + + +if __name__ == "__main__": + domains = [ + "bangbros", + "virtualporn", + ] + op, args = scraper_args() + result = None + + match op, args: + case "scene-by-url", {"url": url} if url: + url = redirect(url) + result = scene_from_url(url, postprocess=bangbros) + case "scene-by-name", {"name": name} if name: + result = scene_search(name, search_domains=domains, postprocess=bangbros) + case "scene-by-fragment" | "scene-by-query-fragment", args: + args = replace_all(args, "url", redirect) + result = scene_from_fragment( + args, search_domains=domains, postprocess=bangbros + ) + case "performer-by-url", {"url": url}: + url = redirect(url) + result = performer_from_url(url, postprocess=bangbros) + case "performer-by-fragment", args: + result = performer_from_fragment(args) + case "performer-by-name", {"name": name} if name: + result = performer_search( + name, search_domains=domains, postprocess=bangbros + ) + case "movie-by-url", {"url": url} if url: + url = redirect(url) + result = movie_from_url(url, postprocess=bangbros) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) diff --git a/scrapers/BangBros/BangBros.yml b/scrapers/BangBros/BangBros.yml new file mode 100644 index 0000000..4be974a --- /dev/null +++ b/scrapers/BangBros/BangBros.yml @@ -0,0 +1,60 @@ +name: BangBros +# requires: py_common, AyloAPI +# scrapes: Ass Parade, Ava Spice, Back Room Facials, Backroom MILF, Ball Honeys, Bang Bus, Bang Casting, Bang POV, Bang Tryouts, BangBros 18, BangBros Angels, Bangbros Clips, BangBros Remastered, BangBros Vault, Big Mouthfuls, Big Tit Cream Pie, Big Tits, Round Asses, BlowJob Fridays, Blowjob Ninjas, Boob Squad, Brown Bunnies, Can He Score, Casting, Chongas, Colombia Fuck Fest, Dirty World Tour, Dorm Invasion, Facial Fest, Fuck Team Five, Glory Hole Loads, Latina Rampage, Living With Anna, MILF Lessons, Milf Soup, Magical Feet, Mom Is Horny, Monsters of Cock, Mr CamelToe, Mr. Anal, My Dirty Maid, My Life In Brazil, Newbie Black, Pawg, Party of 3, Penny Show, Porn Star Spa, Power Munch, Public Bang, Slutty White Girls, Stepmom Videos, Street Ranger, Tugjobs, Virtual Porn, Working Latinas +sceneByURL: + - action: script + url: + - bangbros.com/video + - virtualporn.com/video + script: + - python + - BangBros.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - BangBros.py + - scene-by-fragment +sceneByName: + action: script + script: + - python + - BangBros.py + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - BangBros.py + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - BangBros.py + - performer-by-name +performerByURL: + - action: script + url: + - bangbros.com/model/ + script: + - python + - BangBros.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - BangBros.py + - performer-by-fragment +movieByURL: + - action: script + url: + - bangbros.com/movie + - virtualporn.com/movie + script: + - python + - BangBros.py + - movie-by-url +# Last Updated January 14, 2024 diff --git a/scrapers/Brazzers/Brazzers.py b/scrapers/Brazzers/Brazzers.py new file mode 100644 index 0000000..b6372b4 --- /dev/null +++ b/scrapers/Brazzers/Brazzers.py @@ -0,0 +1,77 @@ +import json +import sys +from typing import Any +from py_common import log +from py_common.util import dig, replace_all, replace_at +from AyloAPI.scrape import ( + scraper_args, + scene_from_url, + scene_search, + scene_from_fragment, + performer_from_url, + performer_from_fragment, + performer_search, + movie_from_url, +) + +studio_map = { + "JugFuckers": "Jug Fuckers", + "Shes Gonna Squirt": "She's Gonna Squirt", +} + + +def bangbros(obj: Any, _) -> Any: + # All brazzers URLs use /video/ instead of the standard /scene/ + # and /pornstar/ instead of the standard /model + fixed = replace_all( + obj, + "url", + lambda x: x.replace("/scene/", "/video/").replace("/model/", "/pornstar/"), + ) + + # Rename certain studios according to the map + fixed = replace_at( + fixed, "studio", "name", replacement=lambda x: studio_map.get(x, x) + ) + + # Brazzers Live special case: if the scene has the tag "Brazzers Live" we need to set the studio name to "Brazzers Live" + if any(t["name"] == "Brazzers Live" for t in dig(obj, "tags", default=[])): + fixed = replace_at( + fixed, "studio", "name", replacement=lambda _: "Brazzers Live" + ) + + return fixed + + +if __name__ == "__main__": + domains = [ + "brazzers", + "brazzersnetwork", + ] + op, args = scraper_args() + result = None + + match op, args: + case "scene-by-url", {"url": url} if url: + result = scene_from_url(url, postprocess=bangbros) + case "scene-by-name", {"name": name} if name: + result = scene_search(name, search_domains=domains, postprocess=bangbros) + case "scene-by-fragment" | "scene-by-query-fragment", args: + result = scene_from_fragment( + args, search_domains=domains, postprocess=bangbros + ) + case "performer-by-url", {"url": url}: + result = performer_from_url(url, postprocess=bangbros) + case "performer-by-fragment", args: + result = performer_from_fragment(args) + case "performer-by-name", {"name": name} if name: + result = performer_search( + name, search_domains=domains, postprocess=bangbros + ) + case "movie-by-url", {"url": url} if url: + result = movie_from_url(url, postprocess=bangbros) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) diff --git a/scrapers/Brazzers/Brazzers.yml b/scrapers/Brazzers/Brazzers.yml new file mode 100644 index 0000000..05b7b60 --- /dev/null +++ b/scrapers/Brazzers/Brazzers.yml @@ -0,0 +1,61 @@ +name: Brazzers +# requires: py_common, AyloAPI +# scrapes: Asses in Public, Baby Got Boobs, Big Butts Like It Big, Big Tits at School, Big Tits at Work, Big Tits In Sports, Big Tits In Uniform, Big Wet Butts, BrazzersExxtra, Brazzers Live, Brazzers Vault, Busty & Real, Bustyz, CFNM, Day With A Pornstar, Dirty Masseur, Doctor Adventures, Hot And Mean, Hot Chicks Big Asses, JugFuckers, Milfs Like It Big, Mommy Got Boobs, Moms in control, Pornstars Like it Big, Real Wife Stories, Shes Gonna Squirt, Teens Like It Big, ZZ Series +sceneByURL: + - action: script + url: + - brazzers.com/video + - brazzersnetwork.com/video + script: + - python + - Brazzers.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - Brazzers.py + - scene-by-fragment +sceneByName: + action: script + script: + - python + - Brazzers.py + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - Brazzers.py + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - Brazzers.py + - performer-by-name +performerByURL: + - action: script + url: + - brazzers.com/pornstar/ + - brazzersnetwork.com/pornstar/ + script: + - python + - Brazzers.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - Brazzers.py + - performer-by-fragment +movieByURL: + - action: script + url: + - brazzers.com/movie + - brazzersnetwork.com/movie + script: + - python + - Brazzers.py + - movie-by-url +# Last Updated January 14, 2024 diff --git a/scrapers/Bromo/Bromo.py b/scrapers/Bromo/Bromo.py new file mode 100644 index 0000000..7fe4d96 --- /dev/null +++ b/scrapers/Bromo/Bromo.py @@ -0,0 +1,51 @@ +import json +import sys +from typing import Any +from py_common import log +from py_common.util import replace_at +from AyloAPI.scrape import ( + scraper_args, + scene_from_url, + scene_search, + scene_from_fragment, + performer_from_url, + performer_from_fragment, + performer_search, + movie_from_url, +) + + +def bromo(obj: Any, _) -> Any: + # Flatten all studios to just "Bromo" + return replace_at(obj, "studio", replacement=lambda _: {"name": "Bromo"}) + + +if __name__ == "__main__": + domains = [ + "bromo", + ] + op, args = scraper_args() + result = None + + match op, args: + case "scene-by-url", {"url": url} if url: + result = scene_from_url(url, postprocess=bromo) + case "scene-by-name", {"name": name} if name: + result = scene_search(name, search_domains=domains, postprocess=bromo) + case "scene-by-fragment" | "scene-by-query-fragment", args: + result = scene_from_fragment( + args, search_domains=domains, postprocess=bromo + ) + case "performer-by-url", {"url": url}: + result = performer_from_url(url, postprocess=bromo) + case "performer-by-fragment", args: + result = performer_from_fragment(args) + case "performer-by-name", {"name": name} if name: + result = performer_search(name, search_domains=domains, postprocess=bromo) + case "movie-by-url", {"url": url} if url: + result = movie_from_url(url, postprocess=bromo) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) diff --git a/scrapers/Bromo/Bromo.yml b/scrapers/Bromo/Bromo.yml new file mode 100644 index 0000000..d3ed51c --- /dev/null +++ b/scrapers/Bromo/Bromo.yml @@ -0,0 +1,60 @@ +name: Bromo +# requires: py_common, AyloAPI +# scrapes: Bromo +sceneByURL: + - action: script + url: + - bromo.com/scene/ + script: + - python + - Bromo.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - Bromo.py + - scene-by-fragment +sceneByName: + action: script + script: + - python + - Bromo.py + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - Bromo.py + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - Bromo.py + - performer-by-name +performerByURL: + - action: script + url: + - bromo.com/model/ + script: + - python + - Bromo.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - Bromo.py + - performer-by-fragment +movieByURL: + - action: script + url: + - bromo.com/scene/ + # Since scenes link to the movie we can scrape movies from scenes + - bromo.com/movie/ + script: + - python + - Bromo.py + - movie-by-url +# Last Updated January 14, 2024 diff --git a/scrapers/CzechHunter/CzechHunter.py b/scrapers/CzechHunter/CzechHunter.py new file mode 100644 index 0000000..889b36d --- /dev/null +++ b/scrapers/CzechHunter/CzechHunter.py @@ -0,0 +1,69 @@ +import json +import sys +from typing import Any +from py_common import log +from py_common.util import dig, replace_all +from AyloAPI.scrape import ( + scraper_args, + scene_from_url, + scene_search, + scene_from_fragment, + performer_from_url, + performer_from_fragment, + performer_search, + movie_from_url, +) + + +def czechhunter(obj: Any, _) -> Any: + replacement = None + match dig(obj, "studio", "name"): + case "Czech Hunter": + replacement = "czechhunter.com" + case "Debt Dandy": + replacement = "debtdandy.com" + case "Dirty Scout": + replacement = "dirtyscout.com" + case _: + # This will never be correct, but I don't see a better way to handle it + replacement = "bigstr.com" + + # Replace the studio name in all URLs + fixed = replace_all(obj, "url", lambda x: x.replace("bigstr.com", replacement)) + + return fixed + + +if __name__ == "__main__": + domains = [ + "czechhunter", + "debtdandy", + "dirtyscout", + ] + op, args = scraper_args() + result = None + + match op, args: + case "scene-by-url", {"url": url} if url: + result = scene_from_url(url, postprocess=czechhunter) + case "scene-by-name", {"name": name} if name: + result = scene_search(name, search_domains=domains, postprocess=czechhunter) + case "scene-by-fragment" | "scene-by-query-fragment", args: + result = scene_from_fragment( + args, search_domains=domains, postprocess=czechhunter + ) + case "performer-by-url", {"url": url}: + result = performer_from_url(url, postprocess=czechhunter) + case "performer-by-fragment", args: + result = performer_from_fragment(args) + case "performer-by-name", {"name": name} if name: + result = performer_search( + name, search_domains=domains, postprocess=czechhunter + ) + case "movie-by-url", {"url": url} if url: + result = movie_from_url(url, postprocess=czechhunter) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) diff --git a/scrapers/CzechHunter/CzechHunter.yml b/scrapers/CzechHunter/CzechHunter.yml new file mode 100644 index 0000000..990c1f5 --- /dev/null +++ b/scrapers/CzechHunter/CzechHunter.yml @@ -0,0 +1,68 @@ +name: Czech Hunter (BigStr) +# requires: py_common, AyloAPI +# scrapes: Czech Hunter, Debt Dandy, Dirty Scout +sceneByURL: + - action: script + url: + - czechhunter.com/scene/ + - debtdandy.com/scene/ + - dirtyscout.com/scene/ + script: + - python + - CzechHunter.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - CzechHunter.py + - scene-by-fragment +sceneByName: + action: script + script: + - python + - CzechHunter.py + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - CzechHunter.py + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - CzechHunter.py + - performer-by-name +performerByURL: + - action: script + url: + - czechhunter.com/model/ + - debtdandy.com/model/ + - dirtyscout.com/model/ + script: + - python + - CzechHunter.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - CzechHunter.py + - performer-by-fragment +movieByURL: + - action: script + url: + - czechhunter.com/scene/ + - debtdandy.com/scene/ + - dirtyscout.com/scene/ + # Since scenes link to the movie we can scrape movies from scenes + - czechhunter.com/movie/ + - debtdandy.com/movie/ + - dirtyscout.com/movie/ + script: + - python + - CzechHunter.py + - movie-by-url +# Last Updated January 14, 2024 diff --git a/scrapers/Deviante/Deviante.py b/scrapers/Deviante/Deviante.py new file mode 100644 index 0000000..43df5a2 --- /dev/null +++ b/scrapers/Deviante/Deviante.py @@ -0,0 +1,89 @@ +import json +import sys +from typing import Any +from py_common import log +from py_common.util import dig, replace_all +from AyloAPI.scrape import ( + scraper_args, + scene_from_url, + scene_search, + scene_from_fragment, + performer_from_url, + performer_from_fragment, + performer_search, + movie_from_url, +) + +studio_map = { + "es": "Erotic Spice", + "fmf": "Forgive Me Father", + "lha": "Love Her Ass", + "pdt": "Pretty Dirty Teens", + "sw": "Sex Working", +} + + +def deviante(obj: Any, _) -> Any: + fixed = replace_all(obj, "name", replacement=lambda x: studio_map.get(x, x)) + + replacement = None + match dig(fixed, "studio", "name"): + case "Erotic Spice": + replacement = "eroticspice.com" + case "Forgive Me Father": + replacement = "forgivemefather.com" + case "Love Her Ass": + replacement = "loveherass.com" + case "Pretty Dirty Teens": + replacement = "prettydirtyteens.com" + case "Sex Working": + replacement = "sexworking.com" + case _: + replacement = "deviante.com" + + # All deviante URLs use /video/ instead of the standard /scene/ + # and also have separate domains per studio + fixed = replace_all( + fixed, + "url", + lambda x: x.replace("/scene/", "/video/").replace("deviante.com", replacement), + ) + + return fixed + + +if __name__ == "__main__": + domains = [ + "eroticspice", + "forgivemefather", + "loveherass", + "prettydirtyteens", + "sexworking", + ] + op, args = scraper_args() + result = None + + match op, args: + case "scene-by-url", {"url": url} if url: + result = scene_from_url(url, postprocess=deviante) + case "scene-by-name", {"name": name} if name: + result = scene_search(name, search_domains=domains, postprocess=deviante) + case "scene-by-fragment" | "scene-by-query-fragment", args: + result = scene_from_fragment( + args, search_domains=domains, postprocess=deviante + ) + case "performer-by-url", {"url": url}: + result = performer_from_url(url, postprocess=deviante) + case "performer-by-fragment", args: + result = performer_from_fragment(args) + case "performer-by-name", {"name": name} if name: + result = performer_search( + name, search_domains=domains, postprocess=deviante + ) + case "movie-by-url", {"url": url} if url: + result = movie_from_url(url, postprocess=deviante) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) diff --git a/scrapers/Deviante/Deviante.yml b/scrapers/Deviante/Deviante.yml new file mode 100644 index 0000000..e7537a0 --- /dev/null +++ b/scrapers/Deviante/Deviante.yml @@ -0,0 +1,80 @@ +name: Deviante +# requires: py_common, AyloAPI +# scrapes: Erotic Spice, Forgive Me Father, Love Her Ass, Pretty Dirty Teens, Sex Working +sceneByURL: + - action: script + url: + - deviante.com/video/ + - eroticspice.com/video/ + - forgivemefather.com/video/ + - loveherass.com/video/ + - prettydirtyteens.com/video/ + - sexworking.com/video/ + script: + - python + - Deviante.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - Deviante.py + - scene-by-fragment +sceneByName: + action: script + script: + - python + - Deviante.py + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - Deviante.py + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - Deviante.py + - performer-by-name +performerByURL: + - action: script + url: + - deviante.com/model/ + - eroticspice.com/model/ + - forgivemefather.com/model/ + - loveherass.com/model/ + - prettydirtyteens.com/model/ + - sexworking.com/model/ + script: + - python + - Deviante.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - Deviante.py + - performer-by-fragment +movieByURL: + - action: script + url: + - deviante.com/movie/ + - eroticspice.com/movie/ + - forgivemefather.com/movie/ + - loveherass.com/movie/ + - prettydirtyteens.com/movie/ + - sexworking.com/movie/ + # Since scenes link to the movie we can scrape movies from scenes + - deviante.com/video/ + - eroticspice.com/video/ + - forgivemefather.com/video/ + - loveherass.com/video/ + - prettydirtyteens.com/video/ + - sexworking.com/video/ + script: + - python + - Deviante.py + - movie-by-url +# Last Updated January 14, 2024 diff --git a/scrapers/DigitalPlayground/DigitalPlayground.py b/scrapers/DigitalPlayground/DigitalPlayground.py new file mode 100644 index 0000000..2319114 --- /dev/null +++ b/scrapers/DigitalPlayground/DigitalPlayground.py @@ -0,0 +1,61 @@ +import json +import sys +from typing import Any +from py_common import log +from py_common.util import replace_at +from AyloAPI.scrape import ( + scraper_args, + scene_from_url, + scene_search, + scene_from_fragment, + performer_from_url, + performer_from_fragment, + performer_search, + movie_from_url, +) + +studio_map = { + "dpw": "DP World", + "Dpstar Episodes": "Episodes", + "Dpstar Sex Challenges": "Sex Challenges", +} + + +def digitalplayground(obj: Any, _) -> Any: + return replace_at(obj, "studio", "name", replacement=lambda x: studio_map.get(x, x)) + + +if __name__ == "__main__": + domains = [ + "bangbros", + "virtualporn", + ] + op, args = scraper_args() + result = None + + match op, args: + case "scene-by-url", {"url": url} if url: + result = scene_from_url(url, postprocess=digitalplayground) + case "scene-by-name", {"name": name} if name: + result = scene_search( + name, search_domains=domains, postprocess=digitalplayground + ) + case "scene-by-fragment" | "scene-by-query-fragment", args: + result = scene_from_fragment( + args, search_domains=domains, postprocess=digitalplayground + ) + case "performer-by-url", {"url": url}: + result = performer_from_url(url, postprocess=digitalplayground) + case "performer-by-fragment", args: + result = performer_from_fragment(args) + case "performer-by-name", {"name": name} if name: + result = performer_search( + name, search_domains=domains, postprocess=digitalplayground + ) + case "movie-by-url", {"url": url} if url: + result = movie_from_url(url, postprocess=digitalplayground) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) diff --git a/scrapers/DigitalPlayground/DigitalPlayground.yml b/scrapers/DigitalPlayground/DigitalPlayground.yml new file mode 100644 index 0000000..218571f --- /dev/null +++ b/scrapers/DigitalPlayground/DigitalPlayground.yml @@ -0,0 +1,64 @@ +name: Digital Playground +# requires: py_common, AyloAPI +# scrapes: Digital Playground, DP Parodies, DP World, Episodes, Flixxx, Rawcut +sceneByURL: + - action: script + url: + - digitalplayground.com/scene/ + - digitalplaygroundnetwork.com/scene/ + script: + - python + - DigitalPlayground.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - DigitalPlayground.py + - scene-by-fragment +sceneByName: + action: script + script: + - python + - DigitalPlayground.py + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - DigitalPlayground.py + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - DigitalPlayground.py + - performer-by-name +performerByURL: + - action: script + url: + - digitalplayground.com/model/ + - digitalplaygroundnetwork.com/model/ + script: + - python + - DigitalPlayground.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - DigitalPlayground.py + - performer-by-fragment +movieByURL: + - action: script + url: + - digitalplayground.com/movie/ + - digitalplaygroundnetwork.com/movie/ + # Since scenes link to the movie we can scrape movies from scenes + - digitalplayground.com/scene/ + - digitalplaygroundnetwork.com/scene/ + script: + - python + - DigitalPlayground.py + - movie-by-url +# Last Updated January 14, 2024 diff --git a/scrapers/Erito.yml b/scrapers/Erito.yml new file mode 100644 index 0000000..b0cf113 --- /dev/null +++ b/scrapers/Erito.yml @@ -0,0 +1,64 @@ +name: Erito +# requires: py_common, AyloAPI +# scrapes: Erito +sceneByURL: + - action: script + url: + - erito.com/scene/ + script: + - python + - AyloAPI/scrape.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - AyloAPI/scrape.py + - erito + - scene-by-fragment +sceneByName: + action: script + script: + - python + - AyloAPI/scrape.py + - erito + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - AyloAPI/scrape.py + - erito + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - AyloAPI/scrape.py + - erito + - performer-by-name +performerByURL: + - action: script + url: + - erito.com/model/ + script: + - python + - AyloAPI/scrape.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - AyloAPI/scrape.py + - performer-by-fragment +movieByURL: + - action: script + url: + - erito.com/movie/ + # Since scenes link to the movie we can scrape movies from scenes + - erito.com/scene/ + script: + - python + - AyloAPI/scrape.py + - movie-by-url +# Last Updated January 13, 2024 diff --git a/scrapers/FakeHub/FakeHub.py b/scrapers/FakeHub/FakeHub.py new file mode 100644 index 0000000..44b4903 --- /dev/null +++ b/scrapers/FakeHub/FakeHub.py @@ -0,0 +1,74 @@ +import json +import sys +from typing import Any +from py_common import log +from py_common.util import dig, replace_all +from AyloAPI.scrape import ( + scraper_args, + scene_from_url, + scene_search, + scene_from_fragment, + performer_from_url, + performer_from_fragment, + performer_search, + movie_from_url, +) + + +def fakehub(obj: Any, _) -> Any: + replacement = None + match dig(obj, "studio", "name"): + case "Fake Hostel": + replacement = "fakehostel.com" + case "Fake Taxi": + replacement = "faketaxi.com" + case "Public Agent": + replacement = "publicagent.com" + case _: + replacement = "fakehub.com" + + # All FakeHub performer URLs use /modelprofile/ instead of the standard /model/ + # and some studios have their own domains + fixed = replace_all( + obj, + "url", + lambda x: x.replace("/model/", "/modelprofile/").replace( + "fakehub.com", replacement + ), + ) + + return fixed + + +if __name__ == "__main__": + domains = [ + "fakehub", + "fakehostel", + "faketaxi", + "publicagent", + ] + op, args = scraper_args() + result = None + + match op, args: + case "scene-by-url", {"url": url} if url: + result = scene_from_url(url, postprocess=fakehub) + case "scene-by-name", {"name": name} if name: + result = scene_search(name, search_domains=domains, postprocess=fakehub) + case "scene-by-fragment" | "scene-by-query-fragment", args: + result = scene_from_fragment( + args, search_domains=domains, postprocess=fakehub + ) + case "performer-by-url", {"url": url}: + result = performer_from_url(url, postprocess=fakehub) + case "performer-by-fragment", args: + result = performer_from_fragment(args) + case "performer-by-name", {"name": name} if name: + result = performer_search(name, search_domains=domains, postprocess=fakehub) + case "movie-by-url", {"url": url} if url: + result = movie_from_url(url, postprocess=fakehub) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) diff --git a/scrapers/FakeHub/FakeHub.yml b/scrapers/FakeHub/FakeHub.yml new file mode 100644 index 0000000..b9e9533 --- /dev/null +++ b/scrapers/FakeHub/FakeHub.yml @@ -0,0 +1,72 @@ +name: FakeHub +# requires: py_common, AyloAPI +# scrapes: Fake Agent, Fake Agent UK, Fake Cop, Fake Driving School, Fake Hospital, Fake Hostel, Fake Taxi, Fakehub Originals, Female Agent, Female Fake Taxi, Public Agent +sceneByURL: + - action: script + url: + - fakehub.com/scene/ + - fakehostel.com/scene/ + - faketaxi.com/scene/ + - publicagent.com/scene/ + script: + - python + - FakeHub.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - FakeHub.py + - scene-by-fragment +sceneByName: + action: script + script: + - python + - FakeHub.py + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - FakeHub.py + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - FakeHub.py + - performer-by-name +performerByURL: + - action: script + url: + - fakehub.com/modelprofile/ + - fakehostel.com/modelprofile/ + - faketaxi.com/modelprofile/ + - publicagent.com/modelprofile/ + script: + - python + - FakeHub.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - FakeHub.py + - performer-by-fragment +movieByURL: + - action: script + url: + - fakehub.com/movie/ + - fakehostel.com/movie/ + - faketaxi.com/movie/ + - publicagent.com/movie/ + # Since scenes link to the movie we can scrape movies from scenes + - fakehub.com/scene/ + - fakehostel.com/scene/ + - faketaxi.com/scene/ + - publicagent.com/scene/ + script: + - python + - FakeHub.py + - movie-by-url +# Last Updated January 14, 2024 diff --git a/scrapers/GayWire/GayWire.py b/scrapers/GayWire/GayWire.py new file mode 100644 index 0000000..3958ccc --- /dev/null +++ b/scrapers/GayWire/GayWire.py @@ -0,0 +1,85 @@ +import json +import sys +from requests import head +from typing import Any +from py_common import log +from py_common.util import replace_all, replace_at +from AyloAPI.scrape import ( + scraper_args, + scene_from_url, + scene_search, + scene_from_fragment, + performer_from_url, + performer_from_fragment, + performer_search, + movie_from_url, +) + +studio_map = { + "Its Gonna Hurt": "It's Gonna Hurt", + "Poundhisass": "Pound His Ass", +} + + +def redirect(url: str) -> str: + if not url: + return url + if (res := head(url)) and (redirect := res.headers.get("Location", url)): + return redirect if not redirect.endswith("404") else url + return url + + +def gaywire(obj: Any, _) -> Any: + if obj is None: + return None + + # API returns Gay Wire substudios as bangbros.com + fixed = replace_all( + obj, + "url", + lambda x: x.replace("www.bangbros.com", "gaywire.com"), + ) + + # Rename certain studios according to the map + fixed = replace_at( + fixed, "studio", "name", replacement=lambda x: studio_map.get(x, x) + ) + + fixed = replace_at( + fixed, "studio", "parent", "name", replacement=lambda x: "Gay Wire" + ) + + return fixed + + +if __name__ == "__main__": + domains = ["gaywire"] + op, args = scraper_args() + result = None + + match op, args: + case "scene-by-url", {"url": url} if url: + url = redirect(url) + result = scene_from_url(url, postprocess=gaywire) + case "scene-by-name", {"name": name} if name: + result = scene_search(name, search_domains=domains, postprocess=gaywire) + case "scene-by-fragment" | "scene-by-query-fragment", args: + args = replace_all(args, "url", redirect) + result = scene_from_fragment( + args, search_domains=domains, postprocess=gaywire + ) + case "performer-by-url", {"url": url}: + url = redirect(url) + result = performer_from_url(url, postprocess=gaywire) + case "performer-by-fragment", args: + result = performer_from_fragment(args) + case "performer-by-name", {"name": name} if name: + result = performer_search(name, search_domains=domains, postprocess=gaywire) + case "movie-by-url", {"url": url} if url: + url = redirect(url) + result = movie_from_url(url, postprocess=gaywire) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) diff --git a/scrapers/GayWire/GayWire.yml b/scrapers/GayWire/GayWire.yml new file mode 100644 index 0000000..276affa --- /dev/null +++ b/scrapers/GayWire/GayWire.yml @@ -0,0 +1,58 @@ +name: Gay Wire +# requires: py_common, AyloAPI +# scrapes: Bait Bus, Bareback Attack, Bareback Casting, ExBF, Haze Him, It's Gonna Hurt, Out In Public, Pound His Ass, Rub Him, Sausage Party, Thug Hunter, UngloryHole, Urban Invasion +sceneByURL: + - action: script + url: + - gaywire.com/video + script: + - python + - GayWire.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - GayWire.py + - scene-by-fragment +sceneByName: + action: script + script: + - python + - GayWire.py + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - GayWire.py + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - GayWire.py + - performer-by-name +performerByURL: + - action: script + url: + - gaywire.com/model/ + script: + - python + - GayWire.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - GayWire.py + - performer-by-fragment +movieByURL: + - action: script + url: + - gaywire.com/movie + script: + - python + - GayWire.py + - movie-by-url +# Last Updated January 14, 2024 diff --git a/scrapers/HentaiPros.yml b/scrapers/HentaiPros.yml new file mode 100644 index 0000000..a74454e --- /dev/null +++ b/scrapers/HentaiPros.yml @@ -0,0 +1,64 @@ +name: Hentai Pros +# requires: py_common, AyloAPI +# scrapes: Hentai Pros +sceneByURL: + - action: script + url: + - hentaipros.com/scene/ + script: + - python + - AyloAPI/scrape.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - AyloAPI/scrape.py + - hentaipros + - scene-by-fragment +sceneByName: + action: script + script: + - python + - AyloAPI/scrape.py + - hentaipros + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - AyloAPI/scrape.py + - hentaipros + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - AyloAPI/scrape.py + - hentaipros + - performer-by-name +performerByURL: + - action: script + url: + - hentaipros.com/model/ + script: + - python + - AyloAPI/scrape.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - AyloAPI/scrape.py + - performer-by-fragment +movieByURL: + - action: script + url: + - hentaipros.com/movie/ + # Since scenes link to the movie we can scrape movies from scenes + - hentaipros.com/scene/ + script: + - python + - AyloAPI/scrape.py + - movie-by-url +# Last Updated January 13, 2024 diff --git a/scrapers/Men/Men.py b/scrapers/Men/Men.py new file mode 100644 index 0000000..6587f1d --- /dev/null +++ b/scrapers/Men/Men.py @@ -0,0 +1,82 @@ +import json +import sys +from typing import Any +from py_common import log +from py_common.util import dig, replace_all, replace_at +from AyloAPI.scrape import ( + scraper_args, + scene_from_url, + scene_search, + scene_from_fragment, + performer_from_url, + performer_from_fragment, + performer_search, + movie_from_url, +) + +studio_map = { + "tp": "TwinkPop", + "Men": "Men.com", +} + + +def men(obj: Any, _) -> Any: + fixed = replace_at( + obj, "studio", "name", replacement=lambda x: studio_map.get(x, x) + ) + fixed = replace_at( + fixed, "studio", "parent", "name", replacement=lambda x: studio_map.get(x, x) + ) + + # TwinkPop is the only special case for now + studio_name = dig(fixed, "studio", "name") + scene = "/sceneid/" if studio_name != "TwinkPop" else "/scene/" + model = "/modelprofile/" if studio_name != "TwinkPop" else "/pornstar/" + domain = "men.com" if studio_name != "TwinkPop" else "twinkpop.com" + + fixed = replace_all( + fixed, + "url", + lambda x: x.replace("/scene/", scene) + .replace("/model/", model) + .replace("men.com", domain), + ) + + return fixed + + +if __name__ == "__main__": + domains = [ + "men", + "bigdicksatschool", + "godsofmen", + "jizzorgy", + "menofuk", + "str8togay", + "thegayoffice", + "toptobottom", + "twinkpop", + ] + op, args = scraper_args() + result = None + + match op, args: + case "scene-by-url", {"url": url} if url: + result = scene_from_url(url, postprocess=men) + case "scene-by-name", {"name": name} if name: + result = scene_search(name, search_domains=domains, postprocess=men) + case "scene-by-fragment" | "scene-by-query-fragment", args: + result = scene_from_fragment(args, search_domains=domains, postprocess=men) + case "performer-by-url", {"url": url}: + result = performer_from_url(url, postprocess=men) + case "performer-by-fragment", args: + result = performer_from_fragment(args) + case "performer-by-name", {"name": name} if name: + result = performer_search(name, search_domains=domains, postprocess=men) + case "movie-by-url", {"url": url} if url: + result = movie_from_url(url, postprocess=men) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) diff --git a/scrapers/Men/Men.yml b/scrapers/Men/Men.yml new file mode 100644 index 0000000..850083f --- /dev/null +++ b/scrapers/Men/Men.yml @@ -0,0 +1,100 @@ +name: Men.com +# requires: py_common, AyloAPI +# scrapes: Big Dicks At School, Drill My Hole, Gods of Men, Jizz Orgy, Men of UK, Men.com, Str8 to Gay, The Gay Office, Top to Bottom, TwinkPop +sceneByURL: + - action: script + url: + - \/men.com/scene + - www.men.com/scene + - mennetwork.com/scene + - bigdicksatschool.com/scene/ + - godsofmen.com/scene/ + - jizzorgy.com/scene/ + - menofuk.com/scene/ + - str8togay.com/scene/ + - thegayoffice.com/scene/ + - toptobottom.com/scene/ + - twinkpop.com/video/ + script: + - python + - Men.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - Men.py + - scene-by-fragment +sceneByName: + action: script + script: + - python + - Men.py + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - Men.py + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - Men.py + - performer-by-name +performerByURL: + - action: script + url: + - \/men.com/modelprofile/ + - www.men.com/modelprofile/ + - mennetwork.com/modelprofile/ + - bigdicksatschool.com/model/ + - godsofmen.com/model/ + - jizzorgy.com/model/ + - menofuk.com/model/ + - str8togay.com/model/ + - thegayoffice.com/model/ + - toptobottom.com/model/ + - twinkpop.com/pornstar/ + script: + - python + - Men.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - Men.py + - performer-by-fragment +movieByURL: + - action: script + url: + - \/men.com/movie + - www.men.com/movie/ + - mennetwork.com/movie + - bigdicksatschool.com/movie/ + - godsofmen.com/movie/ + - jizzorgy.com/movie/ + - menofuk.com/movie/ + - str8togay.com/movie/ + - thegayoffice.com/movie/ + - toptobottom.com/movie/ + - twinkpop.com/movie/ + # Since scenes link to the movie we can scrape movies from scenes + - \/men.com/scene + - www.men.com/scene + - mennetwork.com/scene + - bigdicksatschool.com/scene/ + - godsofmen.com/scene/ + - jizzorgy.com/scene/ + - menofuk.com/scene/ + - str8togay.com/scene/ + - thegayoffice.com/scene/ + - toptobottom.com/scene/ + - twinkpop.com/video/ + script: + - python + - Men.py + - movie-by-url +# Last Updated January 14, 2024 diff --git a/scrapers/MetroHD/MetroHD.py b/scrapers/MetroHD/MetroHD.py new file mode 100644 index 0000000..a6af6ca --- /dev/null +++ b/scrapers/MetroHD/MetroHD.py @@ -0,0 +1,85 @@ +import json +import sys +from typing import Any +from py_common import log +from py_common.util import dig, replace_all, replace_at +from AyloAPI.scrape import ( + scraper_args, + scene_from_url, + scene_search, + scene_from_fragment, + performer_from_url, + performer_from_fragment, + performer_search, + movie_from_url, +) + + +def metrohd(obj: Any, _) -> Any: + replacement = None + match dig(obj, "studio", "name"): + case "Deviant Hardcore": + replacement = "devianthardcore.com" + case "Family Hook Ups": + replacement = "familyhookups.com" + case "Girl Grind": + replacement = "girlgrind.com" + case "Kinky Spa": + replacement = "kinkyspa.com" + case "She Will Cheat": + replacement = "shewillcheat.com" + case _: + replacement = "metrohd.com" + + # Replace the studio name in all URLs: even if there's no specific studio, + # metro.com is wrong and needs to be replaced with metrohd.com + fixed = replace_all(obj, "url", lambda x: x.replace("metro.com", replacement)) + + # The API returns Metro as a studio name but we know them as Metro HD + fixed = replace_at( + fixed, "studio", "name", replacement=lambda x: x.replace("Metro", "Metro HD") + ) + fixed = replace_at( + fixed, + "studio", + "parent", + "name", + replacement=lambda x: x.replace("Metro", "Metro HD"), + ) + return fixed + + +if __name__ == "__main__": + domains = [ + "devianthardcore", + "familyhookups", + "girlgrind", + "kinkyspa", + "shewillcheat", + "metrohd", + ] + op, args = scraper_args() + result = None + + match op, args: + case "scene-by-url", {"url": url} if url: + result = scene_from_url(url, postprocess=metrohd) + case "scene-by-name", {"name": name} if name: + result = scene_search(name, search_domains=domains, postprocess=metrohd) + case "scene-by-fragment" | "scene-by-query-fragment", args: + result = scene_from_fragment( + args, search_domains=domains, postprocess=metrohd + ) + case "performer-by-url", {"url": url}: + result = performer_from_url(url, postprocess=metrohd) + case "performer-by-fragment", args: + result = performer_from_fragment(args) + case "performer-by-name", {"name": name} if name: + result = performer_search(name, search_domains=domains, postprocess=metrohd) + case "movie-by-url", {"url": url} if url: + result = movie_from_url(url, postprocess=metrohd) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) diff --git a/scrapers/MetroHD/MetroHD.yml b/scrapers/MetroHD/MetroHD.yml new file mode 100644 index 0000000..364c575 --- /dev/null +++ b/scrapers/MetroHD/MetroHD.yml @@ -0,0 +1,73 @@ +name: Metro HD +# requires: py_common, AyloAPI +# scrapes: Metro HD, Deviant Hardcore, Girl Grind, Kinky Spa, She Will Cheat, Family Hookups +sceneByURL: + - action: script + url: + - metrohd.com/scene/ + - devianthardcore.com/scene/ + - familyhookups.com/scene/ + - girlgrind.com/scene/ + - kinkyspa.com/scene/ + - shewillcheat.com/scene/ + script: + - python + - MetroHD.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - MetroHD.py + - scene-by-fragment +sceneByName: + action: script + script: + - python + - MetroHD.py + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - MetroHD.py + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - MetroHD.py + - performer-by-name +performerByURL: + - action: script + url: + - metrohd.com/model/ + - devianthardcore.com/model/ + - familyhookups.com/model/ + - girlgrind.com/model/ + - kinkyspa.com/model/ + - shewillcheat.com/model/ + script: + - python + - MetroHD.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - MetroHD.py + - performer-by-fragment +movieByURL: + - action: script + url: + - metrohd.com/movie/ + - devianthardcore.com/movie/ + - familyhookups.com/movie/ + - girlgrind.com/movie/ + - kinkyspa.com/movie/ + - shewillcheat.com/movie/ + script: + - python + - MetroHD.py + - movie-by-url +# Last Updated January 14, 2024 diff --git a/scrapers/MileHighMedia_BiandTrans/MileHighMedia_BiandTrans.py b/scrapers/MileHighMedia_BiandTrans/MileHighMedia_BiandTrans.py new file mode 100644 index 0000000..d6fdb8a --- /dev/null +++ b/scrapers/MileHighMedia_BiandTrans/MileHighMedia_BiandTrans.py @@ -0,0 +1,73 @@ +import json +import sys +from typing import Any +from py_common import log +from py_common.util import dig, replace_all +from AyloAPI.scrape import ( + scraper_args, + scene_from_url, + scene_search, + scene_from_fragment, + performer_from_url, + performer_from_fragment, + performer_search, + movie_from_url, +) + +studio_map = { + "BIEmpire": "Bi Empire", +} + + +def milehigh(obj: Any, _) -> Any: + fixed = replace_all(obj, "name", replacement=lambda x: studio_map.get(x, x)) + + replacement = None + match dig(fixed, "studio", "name"): + case "Bi Empire": + replacement = "biempire.com" + case "Transsensual": + replacement = "transsensual.com" + case _: + replacement = "milehighmedia.com" + + # Replace the studio name in all URLs: even if there's no specific studio, + # milehigh.com is wrong and needs to be replaced with milehighmedia.com + fixed = replace_all(fixed, "url", lambda x: x.replace("milehigh.com", replacement)) + + return fixed + + +if __name__ == "__main__": + domains = [ + "milehighmedia", + "biempire", + "transsensual", + ] + op, args = scraper_args() + result = None + + match op, args: + case "scene-by-url", {"url": url} if url: + result = scene_from_url(url, postprocess=milehigh) + case "scene-by-name", {"name": name} if name: + result = scene_search(name, search_domains=domains, postprocess=milehigh) + case "scene-by-fragment" | "scene-by-query-fragment", args: + result = scene_from_fragment( + args, search_domains=domains, postprocess=milehigh + ) + case "performer-by-url", {"url": url}: + result = performer_from_url(url, postprocess=milehigh) + case "performer-by-fragment", args: + result = performer_from_fragment(args) + case "performer-by-name", {"name": name} if name: + result = performer_search( + name, search_domains=domains, postprocess=milehigh + ) + case "movie-by-url", {"url": url} if url: + result = movie_from_url(url, postprocess=milehigh) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) diff --git a/scrapers/MileHighMedia_BiandTrans/MileHighMedia_BiandTrans.yml b/scrapers/MileHighMedia_BiandTrans/MileHighMedia_BiandTrans.yml new file mode 100644 index 0000000..9123781 --- /dev/null +++ b/scrapers/MileHighMedia_BiandTrans/MileHighMedia_BiandTrans.yml @@ -0,0 +1,64 @@ +name: Mile High Media (Bi and Trans) +# requires: py_common, AyloAPI +# scrapes: Bi Empire, Transsensual +sceneByURL: + - action: script + url: + - biempire.com/scene/ + - transsensual.com/scene/ + script: + - python + - MileHighMedia_BiandTrans.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - MileHighMedia_BiandTrans.py + - scene-by-fragment +sceneByName: + action: script + script: + - python + - MileHighMedia_BiandTrans.py + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - MileHighMedia_BiandTrans.py + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - MileHighMedia_BiandTrans.py + - performer-by-name +performerByURL: + - action: script + url: + - biempire.com/model/ + - transsensual.com/model/ + script: + - python + - MileHighMedia_BiandTrans.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - MileHighMedia_BiandTrans.py + - performer-by-fragment +movieByURL: + - action: script + url: + - biempire.com/movie/ + - transsensual.com/movie/ + # Since scenes link to the movie we can scrape movies from scenes + - biempire.com/scene/ + - transsensual.com/scene/ + script: + - python + - MileHighMedia_BiandTrans.py + - movie-by-url +# Last Updated January 14, 2024 diff --git a/scrapers/MileHighMedia_Gay/MileHighMedia_Gay.py b/scrapers/MileHighMedia_Gay/MileHighMedia_Gay.py new file mode 100644 index 0000000..f0dab98 --- /dev/null +++ b/scrapers/MileHighMedia_Gay/MileHighMedia_Gay.py @@ -0,0 +1,67 @@ +import json +import sys +from typing import Any +from py_common import log +from py_common.util import dig, replace_all +from AyloAPI.scrape import ( + scraper_args, + scene_from_url, + scene_search, + scene_from_fragment, + performer_from_url, + performer_from_fragment, + performer_search, + movie_from_url, +) + + +def milehigh(obj: Any, _) -> Any: + replacement = None + match dig(obj, "studio", "name"): + case "Icon Male": + replacement = "iconmale.com" + case "Noir Male": + replacement = "noirmale.com" + case _: + replacement = "milehighmedia.com" + + # Replace the studio name in all URLs: even if there's no specific studio, + # milehigh.com is wrong and needs to be replaced with milehighmedia.com + fixed = replace_all(obj, "url", lambda x: x.replace("milehigh.com", replacement)) + + return fixed + + +if __name__ == "__main__": + domains = [ + "iconmale", + "noirmale", + "taboomale", + ] + op, args = scraper_args() + result = None + + match op, args: + case "scene-by-url", {"url": url} if url: + result = scene_from_url(url, postprocess=milehigh) + case "scene-by-name", {"name": name} if name: + result = scene_search(name, search_domains=domains, postprocess=milehigh) + case "scene-by-fragment" | "scene-by-query-fragment", args: + result = scene_from_fragment( + args, search_domains=domains, postprocess=milehigh + ) + case "performer-by-url", {"url": url}: + result = performer_from_url(url, postprocess=milehigh) + case "performer-by-fragment", args: + result = performer_from_fragment(args) + case "performer-by-name", {"name": name} if name: + result = performer_search( + name, search_domains=domains, postprocess=milehigh + ) + case "movie-by-url", {"url": url} if url: + result = movie_from_url(url, postprocess=milehigh) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) diff --git a/scrapers/MileHighMedia_Gay/MileHighMedia_Gay.yml b/scrapers/MileHighMedia_Gay/MileHighMedia_Gay.yml new file mode 100644 index 0000000..7796f74 --- /dev/null +++ b/scrapers/MileHighMedia_Gay/MileHighMedia_Gay.yml @@ -0,0 +1,64 @@ +name: Mile High Media (Gay) +# requires: py_common, AyloAPI +# scrapes: Icon Male, Noir Male, Taboo Male +sceneByURL: + - action: script + url: + - iconmale.com/scene/ + - noirmale.com/scene/ + script: + - python + - MileHighMedia_Gay.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - MileHighMedia_Gay.py + - scene-by-fragment +sceneByName: + action: script + script: + - python + - MileHighMedia_Gay.py + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - MileHighMedia_Gay.py + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - MileHighMedia_Gay.py + - performer-by-name +performerByURL: + - action: script + url: + - iconmale.com/model/ + - noirmale.com/model/ + script: + - python + - MileHighMedia_Gay.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - MileHighMedia_Gay.py + - performer-by-fragment +movieByURL: + - action: script + url: + - iconmale.com/movie/ + - noirmale.com/movie/ + # Since scenes link to the movie we can scrape movies from scenes + - iconmale.com/scene/ + - noirmale.com/scene/ + script: + - python + - MileHighMedia_Gay.py + - movie-by-url +# Last Updated January 14, 2024 diff --git a/scrapers/MileHighMedia_Straight/MileHighMedia_Straight.py b/scrapers/MileHighMedia_Straight/MileHighMedia_Straight.py new file mode 100644 index 0000000..8fa6987 --- /dev/null +++ b/scrapers/MileHighMedia_Straight/MileHighMedia_Straight.py @@ -0,0 +1,93 @@ +import json +import sys +from typing import Any +from py_common import log +from py_common.util import dig, replace_all +from AyloAPI.scrape import ( + scraper_args, + scene_from_url, + scene_search, + scene_from_fragment, + performer_from_url, + performer_from_fragment, + performer_search, + movie_from_url, +) + +studio_map = { + "dlf": "Dilfed", + "DogHouseDigital": "Doghouse Digital", + "LesbianOlderYounger": "Lesbian Older Younger", + "SweetHeartVideo": "Sweetheart Video", + "SweetSinner": "Sweet Sinner", + "RealityJunkies": "Reality Junkies", +} + + +def milehigh(obj: Any, _) -> Any: + fixed = replace_all(obj, "name", replacement=lambda x: studio_map.get(x, x)) + + replacement = None + match dig(fixed, "studio", "name"): + case "Dilfed": + replacement = "dilfed.com" + case "Doghouse Digital": + replacement = "doghousedigital.com" + case "Family Sinners": + replacement = "familysinners.com" + case "Milfed" | "Lesbian Older Younger": + replacement = "milfed.com" + case "Reality Junkies": + replacement = "realityjunkies.com" + case "Sweet Sinner": + replacement = "sweetsinner.com" + case "Sweetheart Video": + replacement = "sweetheartvideo.com" + case _: + replacement = "milehighmedia.com" + + # Replace the studio name in all URLs: even if there's no specific studio, + # milehigh.com is wrong and needs to be replaced with milehighmedia.com + fixed = replace_all(fixed, "url", lambda x: x.replace("milehigh.com", replacement)) + + return fixed + + +if __name__ == "__main__": + domains = [ + "milehighmedia", + "dilfed", + "doghousedigital", + "familysinners", + "milfed", + "realityjunkies", + "sweetsinner", + "sweetheartvideo", + ] + op, args = scraper_args() + result = None + + match op, args: + case "scene-by-url", {"url": url} if url: + result = scene_from_url(url, postprocess=milehigh) + case "scene-by-name", {"name": name} if name: + result = scene_search(name, search_domains=domains, postprocess=milehigh) + case "scene-by-fragment" | "scene-by-query-fragment", args: + result = scene_from_fragment( + args, search_domains=domains, postprocess=milehigh + ) + case "performer-by-url", {"url": url}: + result = performer_from_url(url, postprocess=milehigh) + case "performer-by-fragment", args: + result = performer_from_fragment(args) + case "performer-by-name", {"name": name} if name: + result = performer_search( + name, search_domains=domains, postprocess=milehigh + ) + case "movie-by-url", {"url": url} if url: + result = movie_from_url(url, postprocess=milehigh) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) diff --git a/scrapers/MileHighMedia_Straight/MileHighMedia_Straight.yml b/scrapers/MileHighMedia_Straight/MileHighMedia_Straight.yml new file mode 100644 index 0000000..156203b --- /dev/null +++ b/scrapers/MileHighMedia_Straight/MileHighMedia_Straight.yml @@ -0,0 +1,88 @@ +name: Mile High Media (Straight) +# requires: py_common, AyloAPI +# scrapes: Dilfed, Doghouse Digital, Family Sinners, Lesbian Older Younger, Mile High Media, Milfed, Reality Junkies, Sweet Sinner, Sweetheart Video +sceneByURL: + - action: script + url: + - milehighmedia.com/scene/ + - dilfed.com/scene/ + - doghousedigital.com/scene/ + - familysinners.com/scene/ + - milfed.com/scene/ + - realityjunkies.com/scene/ + - sweetsinner.com/scene/ + - sweetheartvideo.com/scene/ + script: + - python + - MileHighMedia_Straight.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - MileHighMedia_Straight.py + - scene-by-fragment +sceneByName: + action: script + script: + - python + - MileHighMedia_Straight.py + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - MileHighMedia_Straight.py + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - MileHighMedia_Straight.py + - performer-by-name +performerByURL: + - action: script + url: + - milehighmedia.com/model/ + - dilfed.com/model/ + - doghousedigital.com/model/ + - familysinners.com/model/ + - milfed.com/model/ + - realityjunkies.com/model/ + - sweetsinner.com/model/ + - sweetheartvideo.com/model/ + script: + - python + - MileHighMedia_Straight.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - MileHighMedia_Straight.py + - performer-by-fragment +movieByURL: + - action: script + url: + - milehighmedia.com/movie/ + - dilfed.com/movie/ + - doghousedigital.com/movie/ + - familysinners.com/movie/ + - milfed.com/movie/ + - realityjunkies.com/movie/ + - sweetsinner.com/movie/ + - sweetheartvideo.com/movie/ + # Since scenes link to the movie we can scrape movies from scenes + - milehighmedia.com/scene/ + - dilfed.com/scene/ + - doghousedigital.com/scene/ + - familysinners.com/scene/ + - milfed.com/scene/ + - realityjunkies.com/scene/ + - sweetsinner.com/scene/ + - sweetheartvideo.com/scene/ + script: + - python + - MileHighMedia_Straight.py + - movie-by-url +# Last Updated January 14, 2024 diff --git a/scrapers/Mofos/Mofos.py b/scrapers/Mofos/Mofos.py new file mode 100644 index 0000000..0b7f0f7 --- /dev/null +++ b/scrapers/Mofos/Mofos.py @@ -0,0 +1,73 @@ +import json +import sys +from typing import Any +from py_common import log +from py_common.util import dig, replace_all, replace_at +from AyloAPI.scrape import ( + scraper_args, + scene_from_url, + scene_search, + scene_from_fragment, + performer_from_url, + performer_from_fragment, + performer_search, + movie_from_url, +) + +studio_map = { + "lpi": "Let's Post It", + "Lets Try Anal": "Let's Try Anal", +} + + +def mofos(obj: Any, _) -> Any: + # Rename certain studios according to the map + fixed = replace_at( + obj, "studio", "name", replacement=lambda x: studio_map.get(x, x) + ) + + domain = None + match dig(fixed, "studio", "name"): + case "I Know That Girl": + domain = "iknowthatgirl.com" + case _: + domain = "mofos.com" + + fixed = replace_all( + fixed, + "url", + lambda x: x.replace("mofos.com", domain), + ) + + return fixed + + +if __name__ == "__main__": + domains = [ + "mofos", + ] + op, args = scraper_args() + result = None + + match op, args: + case "scene-by-url", {"url": url} if url: + result = scene_from_url(url, postprocess=mofos) + case "scene-by-name", {"name": name} if name: + result = scene_search(name, search_domains=domains, postprocess=mofos) + case "scene-by-fragment" | "scene-by-query-fragment", args: + result = scene_from_fragment( + args, search_domains=domains, postprocess=mofos + ) + case "performer-by-url", {"url": url}: + result = performer_from_url(url, postprocess=mofos) + case "performer-by-fragment", args: + result = performer_from_fragment(args) + case "performer-by-name", {"name": name} if name: + result = performer_search(name, search_domains=domains, postprocess=mofos) + case "movie-by-url", {"url": url} if url: + result = movie_from_url(url, postprocess=mofos) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) diff --git a/scrapers/Mofos/Mofos.yml b/scrapers/Mofos/Mofos.yml new file mode 100644 index 0000000..dda83d3 --- /dev/null +++ b/scrapers/Mofos/Mofos.yml @@ -0,0 +1,76 @@ +name: Mofos +# requires: py_common, AyloAPI +# scrapes: Busted Babysitters, Don't Break Me, Drone Hunter, Girls Gone Pink, I Know That Girl, In Gang We Bang, Latina Sex Tapes, Let's Post It, Let's Try Anal, Milfs Like It Black, Mofos B Sides, MOFOS Lab, Mofos World Wide, Pervs On Patrol, Pornstar Vote, Project RV, Pubic Pickups, Real Slut Party, Share My BF, She's A Freak, Stranded Teens, The Sex Scout +sceneByURL: + - action: script + url: + - mofos.com/scene/ + - mofosnetwork.com/scene/ + - iknowthatgirl.com/scene/ + - publicagent.com/scene/ + - letstryanal.com/scene/ + script: + - python + - Mofos.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - Mofos.py + - scene-by-fragment +sceneByName: + action: script + script: + - python + - Mofos.py + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - Mofos.py + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - Mofos.py + - performer-by-name +performerByURL: + - action: script + url: + - mofos.com/model/ + - mofosnetwork.com/model/ + - iknowthatgirl.com/model/ + - publicagent.com/model/ + - letstryanal.com/model/ + script: + - python + - Mofos.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - Mofos.py + - performer-by-fragment +movieByURL: + - action: script + url: + - mofos.com/movie/ + - mofosnetwork.com/movie/ + - iknowthatgirl.com/movie/ + - publicagent.com/movie/ + - letstryanal.com/movie/ + # Since scenes link to the movie we can scrape movies from scenes + - mofos.com/scene/ + - mofosnetwork.com/scene/ + - iknowthatgirl.com/scene/ + - publicagent.com/scene/ + - letstryanal.com/scene/ + script: + - python + - Mofos.py + - movie-by-url +# Last Updated January 14, 2024 diff --git a/scrapers/PropertySex/PropertySex.py b/scrapers/PropertySex/PropertySex.py new file mode 100644 index 0000000..3bb910e --- /dev/null +++ b/scrapers/PropertySex/PropertySex.py @@ -0,0 +1,66 @@ +import json +import sys +from typing import Any +from py_common import log +from py_common.util import dig, replace_all +from AyloAPI.scrape import ( + scraper_args, + scene_from_url, + scene_search, + scene_from_fragment, + performer_from_url, + performer_from_fragment, + performer_search, + movie_from_url, +) + + +def propertysex(obj: Any, _) -> Any: + domain = None + match dig(obj, "studio", "name"): + case "House Humpers": + domain = "househumpers.com" + case _: + domain = "propertysex.com" + + fixed = replace_all( + obj, + "url", + lambda x: x.replace("propertysex.com", domain), + ) + + return fixed + + +if __name__ == "__main__": + domains = [ + "propertysex", + "househumpers", + ] + op, args = scraper_args() + result = None + + match op, args: + case "scene-by-url", {"url": url} if url: + result = scene_from_url(url, postprocess=propertysex) + case "scene-by-name", {"name": name} if name: + result = scene_search(name, search_domains=domains, postprocess=propertysex) + case "scene-by-fragment" | "scene-by-query-fragment", args: + result = scene_from_fragment( + args, search_domains=domains, postprocess=propertysex + ) + case "performer-by-url", {"url": url}: + result = performer_from_url(url, postprocess=propertysex) + case "performer-by-fragment", args: + result = performer_from_fragment(args) + case "performer-by-name", {"name": name} if name: + result = performer_search( + name, search_domains=domains, postprocess=propertysex + ) + case "movie-by-url", {"url": url} if url: + result = movie_from_url(url, postprocess=propertysex) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) diff --git a/scrapers/PropertySex/PropertySex.yml b/scrapers/PropertySex/PropertySex.yml new file mode 100644 index 0000000..a6854bf --- /dev/null +++ b/scrapers/PropertySex/PropertySex.yml @@ -0,0 +1,64 @@ +name: Property Sex +# requires: py_common, AyloAPI +# scrapes: House Humpers, Property Sex, Property Sex VR +sceneByURL: + - action: script + url: + - propertysex.com/scene/ + - househumpers.com/scene/ + script: + - python + - PropertySex.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - PropertySex.py + - scene-by-fragment +sceneByName: + action: script + script: + - python + - PropertySex.py + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - PropertySex.py + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - PropertySex.py + - performer-by-name +performerByURL: + - action: script + url: + - propertysex.com/model/ + - househumpers.com/model/ + script: + - python + - PropertySex.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - PropertySex.py + - performer-by-fragment +movieByURL: + - action: script + url: + - propertysex.com/movie/ + - househumpers.com/movie/ + # Since scenes link to the movie we can scrape movies from scenes + - propertysex.com/scene/ + - househumpers.com/scene/ + script: + - python + - PropertySex.py + - movie-by-url +# Last Updated January 14, 2024 diff --git a/scrapers/RealityDudes/RealityDudes.py b/scrapers/RealityDudes/RealityDudes.py new file mode 100644 index 0000000..ce5e5ee --- /dev/null +++ b/scrapers/RealityDudes/RealityDudes.py @@ -0,0 +1,67 @@ +import json +import sys +from typing import Any +from py_common import log +from py_common.util import dig, replace_all +from AyloAPI.scrape import ( + scraper_args, + scene_from_url, + scene_search, + scene_from_fragment, + performer_from_url, + performer_from_fragment, + performer_search, + movie_from_url, +) + + +def realitydudes(obj: Any, _) -> Any: + domain = "realitydudes.com" + model = "/model/" + if dig(obj, "studio", "name") == "Papi": + domain = "papi.com" + model = "/pornstar/" + + fixed = replace_all( + obj, + "url", + lambda x: x.replace("realitydudes.com", domain).replace("/model/", model), + ) + + return fixed + + +if __name__ == "__main__": + domains = [ + "realitydudes", + "papi", + ] + op, args = scraper_args() + result = None + + match op, args: + case "scene-by-url", {"url": url} if url: + result = scene_from_url(url, postprocess=realitydudes) + case "scene-by-name", {"name": name} if name: + result = scene_search( + name, search_domains=domains, postprocess=realitydudes + ) + case "scene-by-fragment" | "scene-by-query-fragment", args: + result = scene_from_fragment( + args, search_domains=domains, postprocess=realitydudes + ) + case "performer-by-url", {"url": url}: + result = performer_from_url(url, postprocess=realitydudes) + case "performer-by-fragment", args: + result = performer_from_fragment(args) + case "performer-by-name", {"name": name} if name: + result = performer_search( + name, search_domains=domains, postprocess=realitydudes + ) + case "movie-by-url", {"url": url} if url: + result = movie_from_url(url, postprocess=realitydudes) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) diff --git a/scrapers/RealityDudes/RealityDudes.yml b/scrapers/RealityDudes/RealityDudes.yml new file mode 100644 index 0000000..af6630d --- /dev/null +++ b/scrapers/RealityDudes/RealityDudes.yml @@ -0,0 +1,68 @@ +name: Reality Dudes +# requires: py_common, AyloAPI +# scrapes: Boys First Time, Dick Dorm, Reality Dudes, Str8 Chaser +sceneByURL: + - action: script + url: + - realitydudes.com/scene/ + - realitydudesnetwork.com/scene/ + - papi.com/scene/ + script: + - python + - RealityDudes.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - RealityDudes.py + - scene-by-fragment +sceneByName: + action: script + script: + - python + - RealityDudes.py + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - RealityDudes.py + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - RealityDudes.py + - performer-by-name +performerByURL: + - action: script + url: + - realitydudes.com/model/ + - realitydudesnetwork.com/model/ + - papi.com/pornstar/ + script: + - python + - RealityDudes.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - RealityDudes.py + - performer-by-fragment +movieByURL: + - action: script + url: + - realitydudes.com/movie/ + - realitydudesnetwork.com/movie/ + - papi.com/movie/ + # Since scenes link to the movie we can scrape movies from scenes + - realitydudes.com/scene/ + - realitydudesnetwork.com/scene/ + - papi.com/scene/ + script: + - python + - RealityDudes.py + - movie-by-url +# Last Updated January 14, 2024 diff --git a/scrapers/RealityKings/RealityKings.py b/scrapers/RealityKings/RealityKings.py new file mode 100644 index 0000000..4e1d566 --- /dev/null +++ b/scrapers/RealityKings/RealityKings.py @@ -0,0 +1,71 @@ +import json +import sys +from typing import Any +from py_common import log +from py_common.util import dig, replace_all, replace_at +from AyloAPI.scrape import ( + scraper_args, + scene_from_url, + scene_search, + scene_from_fragment, + performer_from_url, + performer_from_fragment, + performer_search, + movie_from_url, +) + +studio_map = { + "rks": "RK Shorts", +} + + +def rk(obj: Any, _) -> Any: + # Rename certain studios according to the map + fixed = replace_at( + obj, "studio", "name", replacement=lambda x: studio_map.get(x, x) + ) + + domain = None + match dig(fixed, "studio", "name"): + case "Look At Her Now": + domain = "lookathernow.com" + case _: + domain = "realitykings.com" + + fixed = replace_all( + fixed, + "url", + lambda x: x.replace("realitykings.com", domain), + ) + + return fixed + + +if __name__ == "__main__": + domains = [ + "realitykings", + "lookathernow", + ] + op, args = scraper_args() + result = None + + match op, args: + case "scene-by-url", {"url": url} if url: + result = scene_from_url(url, postprocess=rk) + case "scene-by-name", {"name": name} if name: + result = scene_search(name, search_domains=domains, postprocess=rk) + case "scene-by-fragment" | "scene-by-query-fragment", args: + result = scene_from_fragment(args, search_domains=domains, postprocess=rk) + case "performer-by-url", {"url": url}: + result = performer_from_url(url, postprocess=rk) + case "performer-by-fragment", args: + result = performer_from_fragment(args) + case "performer-by-name", {"name": name} if name: + result = performer_search(name, search_domains=domains, postprocess=rk) + case "movie-by-url", {"url": url} if url: + result = movie_from_url(url, postprocess=rk) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) diff --git a/scrapers/RealityKings/RealityKings.yml b/scrapers/RealityKings/RealityKings.yml new file mode 100644 index 0000000..66c4cf6 --- /dev/null +++ b/scrapers/RealityKings/RealityKings.yml @@ -0,0 +1,172 @@ +name: Mofos +# requires: py_common, AyloAPI +# scrapes: 40 Inch Plus, 8th Street Latinas, Bad Tow Truck, Big Naturals, Big Tits Boss, Captain Stabbin, CFNM Secret, Crazy Asian GFs, Crazy College GFs, Cum Fiesta, Dare Dorm, Euro Sex Parties, Extreme Asses, Extreme Naturals, First Time Auditions, GF Leaks, Girls of Naked, Happy Tugs, HD Love, Horny Birds, Hot Bush, Hot Girls Game, In the VIP, Lil Humpers, Look At Her Now, Mike in Brazil, Mike's Apartment, Milf Hunter, Milf Next Door, Moms Bang Teens, Moms Lick Teens, Money Talks, Monster Curves, No Faces, Pure 18, Reckless in Miami, RK Prime, RK Shorts, See My Wife, Sneaky Sex, Street BlowJobs, Teens Love Huge Cocks, We Live Together, Wives in Pantyhose, Work Me Harder +sceneByURL: + - action: script + url: + - 8thstreetlatinas.com/scene/ + - bignaturals.com/scene/ + - cumfiesta.com/scene/ + - daredorm.com/scene/ + - eurosexparties.com/scene/ + - gfleaks.com/scene/ + - happytugs.com/scene/ + - hdlove.com/scene/ + - hornybirds.com/scene/ + - hotgirlsgame.com/scene/ + - lilhumpers.com/scene/ + - lookathernow.com/scene/ + - mikeinbrazil.com/scene/ + - mikesapartment.com/scene/ + - milfhunter.com/scene/ + - momsbangteens.com/scene/ + - momslickteens.com/scene/ + - moneytalks.com/scene/ + - monstercurves.com/scene/ + - pure18.com/scene/ + - realitykings.com/scene/ + - realitykingsnetwork.com/scene/ + - recklessinmiami.com/scene/ + - rk.com/scene/ + - rkprime.com/scene/ + - sneakysex.com/scene/ + - teenslovehugecocks.com/scene/ + - welivetogether.com/scene/ + - workmeharder.com/scene/ + script: + - python + - RealityKings.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - RealityKings.py + - scene-by-fragment +sceneByName: + action: script + script: + - python + - RealityKings.py + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - RealityKings.py + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - RealityKings.py + - performer-by-name +performerByURL: + - action: script + url: + - 8thstreetlatinas.com/model/ + - bignaturals.com/model/ + - cumfiesta.com/model/ + - daredorm.com/model/ + - eurosexparties.com/model/ + - gfleaks.com/model/ + - happytugs.com/model/ + - hdlove.com/model/ + - hornybirds.com/model/ + - hotgirlsgame.com/model/ + - lilhumpers.com/model/ + - lookathernow.com/model/ + - mikeinbrazil.com/model/ + - mikesapartment.com/model/ + - milfhunter.com/model/ + - momsbangteens.com/model/ + - momslickteens.com/model/ + - moneytalks.com/model/ + - monstercurves.com/model/ + - pure18.com/model/ + - realitykings.com/model/ + - realitykingsnetwork.com/model/ + - recklessinmiami.com/model/ + - rk.com/model/ + - rkprime.com/model/ + - sneakysex.com/model/ + - teenslovehugecocks.com/model/ + - welivetogether.com/model/ + - workmeharder.com/model/ + script: + - python + - RealityKings.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - RealityKings.py + - performer-by-fragment +movieByURL: + - action: script + url: + - 8thstreetlatinas.com/movie/ + - bignaturals.com/movie/ + - cumfiesta.com/movie/ + - daredorm.com/movie/ + - eurosexparties.com/movie/ + - gfleaks.com/movie/ + - happytugs.com/movie/ + - hdlove.com/movie/ + - hornybirds.com/movie/ + - hotgirlsgame.com/movie/ + - lilhumpers.com/movie/ + - lookathernow.com/movie/ + - mikeinbrazil.com/movie/ + - mikesapartment.com/movie/ + - milfhunter.com/movie/ + - momsbangteens.com/movie/ + - momslickteens.com/movie/ + - moneytalks.com/movie/ + - monstercurves.com/movie/ + - pure18.com/movie/ + - realitykings.com/movie/ + - realitykingsnetwork.com/movie/ + - recklessinmiami.com/movie/ + - rk.com/movie/ + - rkprime.com/movie/ + - sneakysex.com/movie/ + - teenslovehugecocks.com/movie/ + - welivetogether.com/movie/ + - workmeharder.com/movie/ + # Since scenes link to the movie we can scrape movies from scenes + - 8thstreetlatinas.com/scene/ + - bignaturals.com/scene/ + - cumfiesta.com/scene/ + - daredorm.com/scene/ + - eurosexparties.com/scene/ + - gfleaks.com/scene/ + - happytugs.com/scene/ + - hdlove.com/scene/ + - hornybirds.com/scene/ + - hotgirlsgame.com/scene/ + - lilhumpers.com/scene/ + - lookathernow.com/scene/ + - mikeinbrazil.com/scene/ + - mikesapartment.com/scene/ + - milfhunter.com/scene/ + - momsbangteens.com/scene/ + - momslickteens.com/scene/ + - moneytalks.com/scene/ + - monstercurves.com/scene/ + - pure18.com/scene/ + - realitykings.com/scene/ + - realitykingsnetwork.com/scene/ + - recklessinmiami.com/scene/ + - rk.com/scene/ + - rkprime.com/scene/ + - sneakysex.com/scene/ + - teenslovehugecocks.com/scene/ + - welivetogether.com/scene/ + - workmeharder.com/scene/ + script: + - python + - RealityKings.py + - movie-by-url +# Last Updated January 14, 2024 diff --git a/scrapers/SeanCody/SeanCody.py b/scrapers/SeanCody/SeanCody.py new file mode 100644 index 0000000..0d78937 --- /dev/null +++ b/scrapers/SeanCody/SeanCody.py @@ -0,0 +1,53 @@ +import json +import sys +from typing import Any +from py_common import log +from py_common.util import replace_at +from AyloAPI.scrape import ( + scraper_args, + scene_from_url, + scene_search, + scene_from_fragment, + performer_from_url, + performer_from_fragment, + performer_search, + movie_from_url, +) + + +def seancody(obj: Any, _) -> Any: + # Flatten all studios to just "Sean Cody" + return replace_at(obj, "studio", replacement=lambda _: {"name": "Sean Cody"}) + + +if __name__ == "__main__": + domains = [ + "seancody", + ] + op, args = scraper_args() + result = None + + match op, args: + case "scene-by-url", {"url": url} if url: + result = scene_from_url(url, postprocess=seancody) + case "scene-by-name", {"name": name} if name: + result = scene_search(name, search_domains=domains, postprocess=seancody) + case "scene-by-fragment" | "scene-by-query-fragment", args: + result = scene_from_fragment( + args, search_domains=domains, postprocess=seancody + ) + case "performer-by-url", {"url": url}: + result = performer_from_url(url, postprocess=seancody) + case "performer-by-fragment", args: + result = performer_from_fragment(args) + case "performer-by-name", {"name": name} if name: + result = performer_search( + name, search_domains=domains, postprocess=seancody + ) + case "movie-by-url", {"url": url} if url: + result = movie_from_url(url, postprocess=seancody) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) diff --git a/scrapers/SeanCody/SeanCody.yml b/scrapers/SeanCody/SeanCody.yml new file mode 100644 index 0000000..310d8ee --- /dev/null +++ b/scrapers/SeanCody/SeanCody.yml @@ -0,0 +1,60 @@ +name: Sean Cody +# requires: py_common, AyloAPI +# scrapes: Sean Cody +sceneByURL: + - action: script + url: + - seancody.com/scene/ + script: + - python + - SeanCody.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - SeanCody.py + - scene-by-fragment +sceneByName: + action: script + script: + - python + - SeanCody.py + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - SeanCody.py + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - SeanCody.py + - performer-by-name +performerByURL: + - action: script + url: + - seancody.com/model/ + script: + - python + - SeanCody.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - SeanCody.py + - performer-by-fragment +movieByURL: + - action: script + url: + - seancody.com/scene/ + # Since scenes link to the movie we can scrape movies from scenes + - seancody.com/movie/ + script: + - python + - SeanCody.py + - movie-by-url +# Last Updated January 14, 2024 diff --git a/scrapers/SexyHub/SexyHub.py b/scrapers/SexyHub/SexyHub.py new file mode 100644 index 0000000..0c61219 --- /dev/null +++ b/scrapers/SexyHub/SexyHub.py @@ -0,0 +1,71 @@ +import json +import sys +from typing import Any +from py_common import log +from py_common.util import dig, replace_all +from AyloAPI.scrape import ( + scraper_args, + scene_from_url, + scene_search, + scene_from_fragment, + performer_from_url, + performer_from_fragment, + performer_search, + movie_from_url, +) + + +def sexyhub(obj: Any, _) -> Any: + replacement = None + match dig(obj, "studio", "name"): + case "Dane Jones": + replacement = "danejones.com" + case "Lesbea": + replacement = "lesbea.com" + case _: + replacement = "sexyhub.com" + + # All SexyHub performer URLs use /modelprofile/ instead of the standard /model/ + # and some studios have their own domains + fixed = replace_all( + obj, + "url", + lambda x: x.replace("/model/", "/modelprofile/").replace( + "sexyhub.com", replacement + ), + ) + + return fixed + + +if __name__ == "__main__": + domains = [ + "sexyhub", + "danejones", + "lesbea", + ] + op, args = scraper_args() + result = None + + match op, args: + case "scene-by-url", {"url": url} if url: + result = scene_from_url(url, postprocess=sexyhub) + case "scene-by-name", {"name": name} if name: + result = scene_search(name, search_domains=domains, postprocess=sexyhub) + case "scene-by-fragment" | "scene-by-query-fragment", args: + result = scene_from_fragment( + args, search_domains=domains, postprocess=sexyhub + ) + case "performer-by-url", {"url": url}: + result = performer_from_url(url, postprocess=sexyhub) + case "performer-by-fragment", args: + result = performer_from_fragment(args) + case "performer-by-name", {"name": name} if name: + result = performer_search(name, search_domains=domains, postprocess=sexyhub) + case "movie-by-url", {"url": url} if url: + result = movie_from_url(url, postprocess=sexyhub) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) diff --git a/scrapers/SexyHub/SexyHub.yml b/scrapers/SexyHub/SexyHub.yml new file mode 100644 index 0000000..40a0ac8 --- /dev/null +++ b/scrapers/SexyHub/SexyHub.yml @@ -0,0 +1,68 @@ +name: Sexy Hub +# requires: py_common, AyloAPI +# scrapes: Dane Jones, Fitness Rooms, Girlfriends, Lesbea, Massage Rooms, Mom XXX +sceneByURL: + - action: script + url: + - danejones.com/scene/ + - sexyhub.com/scene/ + - lesbea.com/scene/ + script: + - python + - SexyHub.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - SexyHub.py + - scene-by-fragment +sceneByName: + action: script + script: + - python + - SexyHub.py + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - SexyHub.py + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - SexyHub.py + - performer-by-name +performerByURL: + - action: script + url: + - danejones.com/modelprofile/ + - sexyhub.com/modelprofile/ + - lesbea.com/modelprofile/ + script: + - python + - SexyHub.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - SexyHub.py + - performer-by-fragment +movieByURL: + - action: script + url: + - danejones.com/movie/ + - sexyhub.com/movie/ + - lesbea.com/movie/ + # Since scenes link to the movie we can scrape movies from scenes + - danejones.com/scene/ + - sexyhub.com/scene/ + - lesbea.com/scene/ + script: + - python + - SexyHub.py + - movie-by-url +# Last Updated January 14, 2024 diff --git a/scrapers/Squirted.yml b/scrapers/Squirted.yml new file mode 100644 index 0000000..c8ba73e --- /dev/null +++ b/scrapers/Squirted.yml @@ -0,0 +1,65 @@ +name: Squirted +# requires: py_common, AyloAPI +# scrapes: Squirted +sceneByURL: + - action: script + url: + - squirted.com/scene/ + script: + - python + - AyloAPI/scrape.py + - squirted + - scene-by-url +sceneByFragment: + action: script + script: + - python + - AyloAPI/scrape.py + - squirted + - scene-by-fragment +sceneByName: + action: script + script: + - python + - AyloAPI/scrape.py + - squirted + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - AyloAPI/scrape.py + - squirted + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - AyloAPI/scrape.py + - squirted + - performer-by-name +performerByURL: + - action: script + url: + - squirted.com/model/ + script: + - python + - AyloAPI/scrape.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - AyloAPI/scrape.py + - performer-by-fragment +movieByURL: + - action: script + url: + - squirted.com/movie/ + # Since scenes link to the movie we can scrape movies from scenes + - squirted.com/scene/ + script: + - python + - AyloAPI/scrape.py + - movie-by-url +# Last Updated January 13, 2024 diff --git a/scrapers/TransAngels.yml b/scrapers/TransAngels.yml new file mode 100644 index 0000000..a04ab63 --- /dev/null +++ b/scrapers/TransAngels.yml @@ -0,0 +1,72 @@ +name: TransAngels +# requires: py_common, AyloAPI +# scrapes: TransAngels, TransHarder +sceneByURL: + - action: script + url: + - transangels.com/scene/ + - transharder.com/scene/ + script: + - python + - AyloAPI/scrape.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - AyloAPI/scrape.py + - transangels + - transharder + - scene-by-fragment +sceneByName: + action: script + script: + - python + - AyloAPI/scrape.py + - transangels + - transharder + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - AyloAPI/scrape.py + - transangels + - transharder + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - AyloAPI/scrape.py + - transangels + - transharder + - performer-by-name +performerByURL: + - action: script + url: + - transangels.com/model/ + - transharder.com/model/ + script: + - python + - AyloAPI/scrape.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - AyloAPI/scrape.py + - performer-by-fragment +movieByURL: + - action: script + url: + - transangels.com/movie/ + - transharder.com/movie/ + # Since scenes link to the movie we can scrape movies from scenes + - transangels.com/scene/ + - transharder.com/scene/ + script: + - python + - AyloAPI/scrape.py + - movie-by-url +# Last Updated January 13, 2024 diff --git a/scrapers/TrueAmateurs.yml b/scrapers/TrueAmateurs.yml new file mode 100644 index 0000000..673a11f --- /dev/null +++ b/scrapers/TrueAmateurs.yml @@ -0,0 +1,64 @@ +name: True Amateurs +# requires: py_common, AyloAPI +# scrapes: True Amateurs +sceneByURL: + - action: script + url: + - trueamateurs.com/scene/ + script: + - python + - AyloAPI/scrape.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - AyloAPI/scrape.py + - trueamateurs + - scene-by-fragment +sceneByName: + action: script + script: + - python + - AyloAPI/scrape.py + - trueamateurs + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - AyloAPI/scrape.py + - trueamateurs + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - AyloAPI/scrape.py + - trueamateurs + - performer-by-name +performerByURL: + - action: script + url: + - trueamateurs.com/model/ + script: + - python + - AyloAPI/scrape.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - AyloAPI/scrape.py + - performer-by-fragment +movieByURL: + - action: script + url: + - trueamateurs.com/movie/ + # Since scenes link to the movie we can scrape movies from scenes + - trueamateurs.com/scene/ + script: + - python + - AyloAPI/scrape.py + - movie-by-url +# Last Updated January 13, 2024 diff --git a/scrapers/Tube8Vip/Tube8Vip.py b/scrapers/Tube8Vip/Tube8Vip.py new file mode 100644 index 0000000..391a662 --- /dev/null +++ b/scrapers/Tube8Vip/Tube8Vip.py @@ -0,0 +1,58 @@ +import json +import sys +from typing import Any +from py_common import log +from py_common.util import replace_at, replace_all +from AyloAPI.scrape import ( + scraper_args, + scene_from_url, + scene_search, + scene_from_fragment, + performer_from_url, + performer_from_fragment, + performer_search, + movie_from_url, +) + + +def tube8vip(obj: Any, _) -> Any: + # comes back as weird studio name Premium with parent Elite + # so we flatten all studios to just "Tube8Vip" to match StashDB + fixed = replace_at(obj, "studio", replacement=lambda _: {"name": "Tube8Vip"}) + fixed = replace_all( + fixed, "url", replacement=lambda url: url.replace("elite.com", "tube8vip.com") + ) + return fixed + + +if __name__ == "__main__": + domains = [ + "tube8vip", + ] + op, args = scraper_args() + result = None + + match op, args: + case "scene-by-url", {"url": url} if url: + result = scene_from_url(url, postprocess=tube8vip) + case "scene-by-name", {"name": name} if name: + result = scene_search(name, search_domains=domains, postprocess=tube8vip) + case "scene-by-fragment" | "scene-by-query-fragment", args: + result = scene_from_fragment( + args, search_domains=domains, postprocess=tube8vip + ) + case "performer-by-url", {"url": url}: + result = performer_from_url(url, postprocess=tube8vip) + case "performer-by-fragment", args: + result = performer_from_fragment(args) + case "performer-by-name", {"name": name} if name: + result = performer_search( + name, search_domains=domains, postprocess=tube8vip + ) + case "movie-by-url", {"url": url} if url: + result = movie_from_url(url, postprocess=tube8vip) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) diff --git a/scrapers/Tube8Vip/Tube8Vip.yml b/scrapers/Tube8Vip/Tube8Vip.yml new file mode 100644 index 0000000..39afcff --- /dev/null +++ b/scrapers/Tube8Vip/Tube8Vip.yml @@ -0,0 +1,60 @@ +name: Tube8Vip +# requires: py_common, AyloAPI +# scrapes: Tube8Vip +sceneByURL: + - action: script + url: + - tube8vip/scene/ + script: + - python + - Tube8Vip.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - Tube8Vip.py + - scene-by-fragment +sceneByName: + action: script + script: + - python + - Tube8Vip.py + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - Tube8Vip.py + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - Tube8Vip.py + - performer-by-name +performerByURL: + - action: script + url: + - tube8vip/model/ + script: + - python + - Tube8Vip.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - Tube8Vip.py + - performer-by-fragment +movieByURL: + - action: script + url: + - tube8vip/scene/ + # Since scenes link to the movie we can scrape movies from scenes + - tube8vip/movie/ + script: + - python + - Tube8Vip.py + - movie-by-url +# Last Updated January 14, 2024 diff --git a/scrapers/Twistys/Twistys.py b/scrapers/Twistys/Twistys.py new file mode 100644 index 0000000..b5e51fe --- /dev/null +++ b/scrapers/Twistys/Twistys.py @@ -0,0 +1,82 @@ +import json +import sys +from typing import Any +from py_common import log +from py_common.util import dig, replace_at +from AyloAPI.scrape import ( + scraper_args, + scene_from_url, + scene_search, + scene_from_fragment, + performer_from_url, + performer_from_fragment, + performer_search, + movie_from_url, +) + +studio_map = { + "anettedawn": "Anette Dawn", + "twistys": "Twistys", + "TwistysHard": "Twistys Hard", + "whengirlsplay": "When Girls Play", +} + + +def twistys(obj: Any, raw: Any) -> Any: + fixed = replace_at( + obj, "studio", "name", replacement=lambda x: studio_map.get(x, x) + ) + + # These are not real studios in the API, so we need to fix them up + # if we find a better way to differentiate between these then this needs fixed + special_studios = { + "bf": "Blue Fantasies", + "bo": "Busty Ones", + "ef": "Euro Foxes", + } + # Scene can belong to multiple studios, we only grab the first one + studio_name = next( + ( + special_studios.get(c["shortName"]) + for c in dig(raw, "collections", default=[]) + if c["shortName"] in special_studios.keys() + ), + dig(fixed, "studio", "name"), + ) + return replace_at( + fixed, + "studio", + replacement=lambda _: {"name": studio_name, "parent": {"name": "Twistys"}}, + ) + + +if __name__ == "__main__": + domains = [ + "bangbros", + "virtualporn", + ] + op, args = scraper_args() + result = None + + match op, args: + case "scene-by-url", {"url": url} if url: + result = scene_from_url(url, postprocess=twistys) + case "scene-by-name", {"name": name} if name: + result = scene_search(name, search_domains=domains, postprocess=twistys) + case "scene-by-fragment" | "scene-by-query-fragment", args: + result = scene_from_fragment( + args, search_domains=domains, postprocess=twistys + ) + case "performer-by-url", {"url": url}: + result = performer_from_url(url, postprocess=twistys) + case "performer-by-fragment", args: + result = performer_from_fragment(args) + case "performer-by-name", {"name": name} if name: + result = performer_search(name, search_domains=domains, postprocess=twistys) + case "movie-by-url", {"url": url} if url: + result = movie_from_url(url, postprocess=twistys) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) diff --git a/scrapers/Twistys/Twistys.yml b/scrapers/Twistys/Twistys.yml new file mode 100644 index 0000000..382102f --- /dev/null +++ b/scrapers/Twistys/Twistys.yml @@ -0,0 +1,64 @@ +name: Twistys +# requires: py_common, AyloAPI +# scrapes: Anette Dawn, Blue Fantasies, Busty Ones, Euro Foxes, Feature Film, Mom Knows Best, Nicole Graves, Turning Twistys, Twistys Hard, Twistys Teasers, When Girls Play +sceneByURL: + - action: script + url: + - twistys.com/scene/ + - twistysnetwork.com/scene/ + script: + - python + - Twistys.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - Twistys.py + - scene-by-fragment +sceneByName: + action: script + script: + - python + - Twistys.py + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - Twistys.py + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - Twistys.py + - performer-by-name +performerByURL: + - action: script + url: + - twistys.com/model/ + - twistysnetwork.com/model/ + script: + - python + - Twistys.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - Twistys.py + - performer-by-fragment +movieByURL: + - action: script + url: + - twistys.com/movie/ + - twistysnetwork.com/movie/ + # Since scenes link to the movie we can scrape movies from scenes + - twistys.com/scene/ + - twistysnetwork.com/scene/ + script: + - python + - Twistys.py + - movie-by-url +# Last Updated January 14, 2024 diff --git a/scrapers/WhyNotBi/WhyNotBi.py b/scrapers/WhyNotBi/WhyNotBi.py new file mode 100644 index 0000000..bda50f2 --- /dev/null +++ b/scrapers/WhyNotBi/WhyNotBi.py @@ -0,0 +1,54 @@ +import json +import sys +from typing import Any +from py_common import log +from py_common.util import replace_at +from AyloAPI.scrape import ( + scraper_args, + scene_from_url, + scene_search, + scene_from_fragment, + performer_from_url, + performer_from_fragment, + performer_search, + movie_from_url, +) + + +def whynotbi(obj: Any, _) -> Any: + # parent studio comes back from API as "WhyNotBy" + # so we flatten all studios to just "Why Not Bi" + return replace_at(obj, "studio", replacement=lambda _: {"name": "Why Not Bi"}) + + +if __name__ == "__main__": + domains = [ + "whynotbi", + ] + op, args = scraper_args() + result = None + + match op, args: + case "scene-by-url", {"url": url} if url: + result = scene_from_url(url, postprocess=whynotbi) + case "scene-by-name", {"name": name} if name: + result = scene_search(name, search_domains=domains, postprocess=whynotbi) + case "scene-by-fragment" | "scene-by-query-fragment", args: + result = scene_from_fragment( + args, search_domains=domains, postprocess=whynotbi + ) + case "performer-by-url", {"url": url}: + result = performer_from_url(url, postprocess=whynotbi) + case "performer-by-fragment", args: + result = performer_from_fragment(args) + case "performer-by-name", {"name": name} if name: + result = performer_search( + name, search_domains=domains, postprocess=whynotbi + ) + case "movie-by-url", {"url": url} if url: + result = movie_from_url(url, postprocess=whynotbi) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) diff --git a/scrapers/WhyNotBi/WhyNotBi.yml b/scrapers/WhyNotBi/WhyNotBi.yml new file mode 100644 index 0000000..010151d --- /dev/null +++ b/scrapers/WhyNotBi/WhyNotBi.yml @@ -0,0 +1,60 @@ +name: Bromo +# requires: py_common, AyloAPI +# scrapes: Why Not Bi +sceneByURL: + - action: script + url: + - whynotbi.com/scene/ + script: + - python + - WhyNotBi.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - WhyNotBi.py + - scene-by-fragment +sceneByName: + action: script + script: + - python + - WhyNotBi.py + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - WhyNotBi.py + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - WhyNotBi.py + - performer-by-name +performerByURL: + - action: script + url: + - whynotbi.com/model/ + script: + - python + - WhyNotBi.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - WhyNotBi.py + - performer-by-fragment +movieByURL: + - action: script + url: + - whynotbi.com/scene/ + # Since scenes link to the movie we can scrape movies from scenes + - whynotbi.com/movie/ + script: + - python + - WhyNotBi.py + - movie-by-url +# Last Updated January 14, 2024 diff --git a/scrapers/py_common/config.py b/scrapers/py_common/config.py new file mode 100644 index 0000000..8db265e --- /dev/null +++ b/scrapers/py_common/config.py @@ -0,0 +1,7 @@ +# An API Key can be generated in Stash's settings page ( Settings > Security > Authentication ) +STASH = { + "url": + "http://localhost:9999", + "api_key": + "" +} diff --git a/scrapers/py_common/graphql.py b/scrapers/py_common/graphql.py new file mode 100644 index 0000000..8aaac9e --- /dev/null +++ b/scrapers/py_common/graphql.py @@ -0,0 +1,1149 @@ +import sys + +try: + import requests +except ModuleNotFoundError: + print( + "You need to install the requests module. (https://docs.python-requests.org/en/latest/user/install/)", + file=sys.stderr, + ) + print( + "If you have pip (normally installed with python), run this command in a terminal (cmd): pip install requests", + file=sys.stderr, + ) + sys.exit() + +try: + import py_common.config as config + import py_common.log as log + from py_common.util import dig +except ModuleNotFoundError: + print( + "You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)", + file=sys.stderr, + ) + sys.exit() + + +def callGraphQL(query: str, variables: dict | None = None): + api_key = config.STASH.get("api_key", "") + url = config.STASH.get("url", "") + if not url: + log.error("You need to set the URL in 'config.py'") + return None + elif "stashdb.org" in url: + log.error("You need to set the URL in 'config.py' to your own stash server") + return None + + stash_url = config.STASH["url"] + "/graphql" + headers = { + "Accept-Encoding": "gzip, deflate", + "Content-Type": "application/json", + "Accept": "application/json", + "Connection": "keep-alive", + "DNT": "1", + "ApiKey": api_key, + } + json = {"query": query} + if variables is not None: + json["variables"] = variables # type: ignore + response = requests.post(stash_url, json=json, headers=headers) + if response.status_code == 200: + result = response.json() + if errors := result.get("error"): + errors = "\n".join(errors) + log.error(f"[ERROR][GraphQL] {errors}") + return None + if result.get("data"): + return result.get("data") + elif response.status_code == 401: + log.error( + "[ERROR][GraphQL] HTTP Error 401, Unauthorised. You can add a API Key in 'config.py' in the 'py_common' folder" + ) + return None + elif response.status_code == 404: + if "localhost:9999" in url: + log.error( + "[ERROR][GraphQL] HTTP Error 404, Not Found. Your local stash server is your endpoint, but port 9999 did not respond. Did you change stash's port? Edit 'config.py' in the 'py_common' folder to point at the correct port for stash!" + ) + else: + log.error( + "[ERROR][GraphQL] HTTP Error 404, Not Found. Make sure 'config.py' in the 'py_common' folder points at the correct address and port!" + ) + return None + + raise ConnectionError( + f"GraphQL query failed: {response.status_code} - {response.content}" + ) + + +def configuration() -> dict | None: + query = """ + query Configuration { + configuration { + ...ConfigData + } + } + fragment ConfigData on ConfigResult { + general { + ...ConfigGeneralData + } + interface { + ...ConfigInterfaceData + } + dlna { + ...ConfigDLNAData + } + scraping { + ...ConfigScrapingData + } + defaults { + ...ConfigDefaultSettingsData + } + } + fragment ConfigGeneralData on ConfigGeneralResult { + stashes { + path + excludeVideo + excludeImage + } + databasePath + generatedPath + metadataPath + cachePath + calculateMD5 + videoFileNamingAlgorithm + parallelTasks + previewAudio + previewSegments + previewSegmentDuration + previewExcludeStart + previewExcludeEnd + previewPreset + maxTranscodeSize + maxStreamingTranscodeSize + writeImageThumbnails + apiKey + username + password + maxSessionAge + logFile + logOut + logLevel + logAccess + createGalleriesFromFolders + videoExtensions + imageExtensions + galleryExtensions + excludes + imageExcludes + customPerformerImageLocation + stashBoxes { + name + endpoint + api_key + } + } + fragment ConfigInterfaceData on ConfigInterfaceResult { + menuItems + soundOnPreview + wallShowTitle + wallPlayback + maximumLoopDuration + noBrowser + autostartVideo + autostartVideoOnPlaySelected + continuePlaylistDefault + showStudioAsText + css + cssEnabled + language + imageLightbox { + slideshowDelay + displayMode + scaleUp + resetZoomOnNav + scrollMode + scrollAttemptsBeforeChange + } + disableDropdownCreate { + performer + tag + studio + } + handyKey + funscriptOffset + } + fragment ConfigDLNAData on ConfigDLNAResult { + serverName + enabled + whitelistedIPs + interfaces + } + fragment ConfigScrapingData on ConfigScrapingResult { + scraperUserAgent + scraperCertCheck + scraperCDPPath + excludeTagPatterns + } + fragment ConfigDefaultSettingsData on ConfigDefaultSettingsResult { + scan { + scanGeneratePreviews + scanGenerateImagePreviews + scanGenerateSprites + scanGeneratePhashes + scanGenerateThumbnails + } + identify { + sources { + source { + ...ScraperSourceData + } + options { + ...IdentifyMetadataOptionsData + } + } + options { + ...IdentifyMetadataOptionsData + } + } + autoTag { + performers + studios + tags + __typename + } + generate { + sprites + previews + imagePreviews + previewOptions { + previewSegments + previewSegmentDuration + previewExcludeStart + previewExcludeEnd + previewPreset + } + markers + markerImagePreviews + markerScreenshots + transcodes + phashes + } + deleteFile + deleteGenerated + } + fragment ScraperSourceData on ScraperSource { + stash_box_endpoint + scraper_id + } + fragment IdentifyMetadataOptionsData on IdentifyMetadataOptions { + fieldOptions { + ...IdentifyFieldOptionsData + } + setCoverImage + setOrganized + includeMalePerformers + } + fragment IdentifyFieldOptionsData on IdentifyFieldOptions { + field + strategy + createMissing + } + """ + result = callGraphQL(query) or {} + return dig(result, "configuration") + + +def getScene(scene_id: str | int) -> dict | None: + query = """ + query FindScene($id: ID!, $checksum: String) { + findScene(id: $id, checksum: $checksum) { + ...SceneData + } + } + fragment SceneData on Scene { + id + title + code + details + urls + date + rating100 + o_counter + organized + interactive + files { + path + size + duration + video_codec + audio_codec + width + height + frame_rate + bit_rate + } + paths { + screenshot + preview + stream + webp + vtt + sprite + funscript + } + scene_markers { + ...SceneMarkerData + } + galleries { + ...SlimGalleryData + } + studio { + ...SlimStudioData + } + movies { + movie { + ...MovieData + } + scene_index + } + tags { + ...SlimTagData + } + performers { + ...PerformerData + } + stash_ids { + endpoint + stash_id + } + } + fragment SceneMarkerData on SceneMarker { + id + title + seconds + stream + preview + screenshot + scene { + id + } + primary_tag { + id + name + aliases + } + tags { + id + name + aliases + } + } + fragment SlimGalleryData on Gallery { + id + title + date + urls + details + rating100 + organized + image_count + cover { + paths { + thumbnail + } + } + studio { + id + name + image_path + } + tags { + id + name + } + performers { + id + name + gender + favorite + image_path + } + scenes { + id + title + files { + path + basename + } + } + } + fragment SlimStudioData on Studio { + id + name + image_path + stash_ids { + endpoint + stash_id + } + parent_studio { + id + } + details + rating100 + aliases + } + fragment MovieData on Movie { + id + name + aliases + duration + date + rating100 + director + studio { + ...SlimStudioData + } + synopsis + url + front_image_path + back_image_path + scene_count + scenes { + id + title + files { + path + } + } + } + fragment SlimTagData on Tag { + id + name + aliases + image_path + } + fragment PerformerData on Performer { + id + name + url + gender + twitter + instagram + birthdate + ethnicity + country + eye_color + height_cm + measurements + fake_tits + career_length + tattoos + piercings + alias_list + favorite + image_path + scene_count + image_count + gallery_count + movie_count + tags { + ...SlimTagData + } + stash_ids { + stash_id + endpoint + } + rating100 + details + death_date + hair_color + weight + } + """ + variables = {"id": str(scene_id)} + result = callGraphQL(query, variables) or {} + return dig(result, "findScene") + + +def getSceneScreenshot(scene_id: str | int) -> str | None: + query = """ + query FindScene($id: ID!, $checksum: String) { + findScene(id: $id, checksum: $checksum) { + id + paths { + screenshot + } + } + } + """ + variables = {"id": str(scene_id)} + result = callGraphQL(query, variables) or {} + return dig(result, "findScene", "paths", "screenshot") + + +def getSceneByPerformerId(performer_id: str | int) -> dict | None: + query = """ +query FindScenes($filter: FindFilterType, $scene_filter: SceneFilterType, $scene_ids: [Int!]) { + findScenes(filter: $filter, scene_filter: $scene_filter, scene_ids: $scene_ids) { + count + filesize + duration + scenes { + ...SceneData + __typename + } + __typename + } + } + + fragment SceneData on Scene { + id + title + details + urls + date + rating100 + o_counter + organized + files { + path + size + duration + video_codec + audio_codec + width + height + frame_rate + bit_rate + __typename + } + interactive + interactive_speed + captions { + language_code + caption_type + __typename + } + created_at + updated_at + paths { + screenshot + preview + stream + webp + vtt + sprite + funscript + interactive_heatmap + caption + __typename + } + scene_markers { + ...SceneMarkerData + __typename + } + galleries { + ...SlimGalleryData + __typename + } + studio { + ...SlimStudioData + __typename + } + movies { + movie { + ...MovieData + __typename + } + scene_index + __typename + } + tags { + ...SlimTagData + __typename + } + performers { + ...PerformerData + __typename + } + stash_ids { + endpoint + stash_id + __typename + } + sceneStreams { + url + mime_type + label + __typename + } + __typename + } + + fragment SceneMarkerData on SceneMarker { + id + title + seconds + stream + preview + screenshot + scene { + id + __typename + } + primary_tag { + id + name + aliases + __typename + } + tags { + id + name + aliases + __typename + } + __typename + } + + fragment SlimGalleryData on Gallery { + id + title + date + urls + details + rating100 + organized + image_count + cover { + paths { + thumbnail + __typename + } + __typename + } + studio { + id + name + image_path + __typename + } + tags { + id + name + __typename + } + performers { + id + name + gender + favorite + image_path + __typename + } + scenes { + id + title + files { + path + } + __typename + } + __typename + } + + fragment SlimStudioData on Studio { + id + name + image_path + stash_ids { + endpoint + stash_id + __typename + } + parent_studio { + id + __typename + } + details + rating100 + aliases + __typename + } + + fragment MovieData on Movie { + id + name + aliases + duration + date + rating100 + director + studio { + ...SlimStudioData + __typename + } + synopsis + url + front_image_path + back_image_path + scene_count + scenes { + id + title + files { + path + } + __typename + } + __typename + } + + fragment SlimTagData on Tag { + id + name + aliases + image_path + __typename + } + + fragment PerformerData on Performer { + id + name + url + gender + twitter + instagram + birthdate + ethnicity + country + eye_color + height_cm + measurements + fake_tits + career_length + tattoos + piercings + alias_list + favorite + ignore_auto_tag + image_path + scene_count + image_count + gallery_count + movie_count + tags { + ...SlimTagData + __typename + } + stash_ids { + stash_id + endpoint + __typename + } + rating100 + details + death_date + hair_color + weight + __typename + } + """ + variables = { + "filter": {"page": 1, "per_page": 20, "sort": "title", "direction": "ASC"}, + "scene_filter": { + "performers": {"value": [str(performer_id)], "modifier": "INCLUDES_ALL"} + }, + } + result = callGraphQL(query, variables) or {} + return dig(result, "findScenes") + + +def getSceneIdByPerformerId(performer_id: str | int) -> dict | None: + query = """ + query FindScenes($filter: FindFilterType, $scene_filter: SceneFilterType, $scene_ids: [Int!]) { + findScenes(filter: $filter, scene_filter: $scene_filter, scene_ids: $scene_ids) { + scenes { + id + title + files { + path + } + paths { + screenshot + } + } + } + } + """ + variables = { + "filter": {"page": 1, "per_page": 20, "sort": "id", "direction": "DESC"}, + "scene_filter": { + "performers": {"value": [str(performer_id)], "modifier": "INCLUDES_ALL"} + }, + } + result = callGraphQL(query, variables) or {} + return dig(result, "findScenes") + + +def getPerformersByName(performer_name: str) -> dict | None: + query = """ + query FindPerformers($filter: FindFilterType, $performer_filter: PerformerFilterType) { + findPerformers(filter: $filter, performer_filter: $performer_filter) { + count + performers { + ...PerformerData + __typename + } + __typename + } + } + + fragment PerformerData on Performer { + id + name + url + gender + twitter + instagram + birthdate + ethnicity + country + eye_color + height_cm + measurements + fake_tits + career_length + tattoos + piercings + alias_list + favorite + ignore_auto_tag + image_path + scene_count + image_count + gallery_count + movie_count + tags { + ...SlimTagData + __typename + } + stash_ids { + stash_id + endpoint + __typename + } + rating100 + details + death_date + hair_color + weight + __typename + } + + fragment SlimTagData on Tag { + id + name + aliases + image_path + __typename + } + """ + + variables = { + "filter": { + "q": performer_name, + "page": 1, + "per_page": 20, + "sort": "name", + "direction": "ASC", + }, + "performer_filter": {}, + } + result = callGraphQL(query, variables) or {} + return dig(result, "findPerformers") + + +def getPerformersIdByName(performer_name: str) -> dict | None: + query = """ + query FindPerformers($filter: FindFilterType, $performer_filter: PerformerFilterType) { + findPerformers(filter: $filter, performer_filter: $performer_filter) { + count + performers { + ...PerformerData + } + } + } + + fragment PerformerData on Performer { + id + name + alias_list + } + """ + + variables = { + "filter": { + "q": performer_name, + "page": 1, + "per_page": 20, + "sort": "name", + "direction": "ASC", + }, + "performer_filter": {}, + } + + result = callGraphQL(query, variables) or {} + return dig(result, "findPerformers") + + +def getGallery(gallery_id: str | int) -> dict | None: + query = """ + query FindGallery($id: ID!) { + findGallery(id: $id) { + ...GalleryData + } + } + fragment GalleryData on Gallery { + id + created_at + updated_at + title + date + urls + details + rating100 + organized + folder { + path + } + cover { + ...SlimImageData + } + studio { + ...SlimStudioData + } + tags { + ...SlimTagData + } + performers { + ...PerformerData + } + scenes { + ...SlimSceneData + } + } + fragment SlimImageData on Image { + id + title + rating100 + organized + o_counter + visual_files { + ... on ImageFile { + path + size + height + width + } + } + + paths { + thumbnail + image + } + + galleries { + id + files { + path + } + title + } + + studio { + id + name + image_path + } + + tags { + id + name + } + + performers { + id + name + gender + favorite + image_path + } + } + fragment SlimStudioData on Studio { + id + name + image_path + stash_ids { + endpoint + stash_id + } + parent_studio { + id + } + details + rating100 + aliases + } + fragment SlimTagData on Tag { + id + name + aliases + image_path + } + fragment PerformerData on Performer { + id + name + url + gender + twitter + instagram + birthdate + ethnicity + country + eye_color + height_cm + measurements + fake_tits + career_length + tattoos + piercings + alias_list + favorite + image_path + scene_count + image_count + gallery_count + movie_count + + tags { + ...SlimTagData + } + + stash_ids { + stash_id + endpoint + } + rating100 + details + death_date + hair_color + weight + } + fragment SlimSceneData on Scene { + id + title + code + details + urls + date + rating100 + o_counter + organized + interactive + + files { + path + size + duration + video_codec + audio_codec + width + height + frame_rate + bit_rate + } + + paths { + screenshot + preview + stream + webp + vtt + sprite + funscript + } + + scene_markers { + id + title + seconds + } + + galleries { + id + title + files { + path + } + } + + studio { + id + name + image_path + } + + movies { + movie { + id + name + front_image_path + } + scene_index + } + + tags { + id + name + } + + performers { + id + name + gender + favorite + image_path + } + + stash_ids { + endpoint + stash_id + } + } + """ + variables = {"id": gallery_id} + result = callGraphQL(query, variables) or {} + return dig(result, "findGallery") + + +def getGalleryPath(gallery_id: str | int) -> str | None: + query = """ + query FindGallery($id: ID!) { + findGallery(id: $id) { + folder { + path + } + files { + path + } + } + } + """ + variables = {"id": gallery_id} + result = callGraphQL(query, variables) or {} + # Galleries can either be a folder full of files or a zip file + return dig(result, "findGallery", "folder", "path") \ + or dig(result, "findGallery", "files", 0, "path") diff --git a/scrapers/py_common/log.py b/scrapers/py_common/log.py new file mode 100644 index 0000000..e013bb4 --- /dev/null +++ b/scrapers/py_common/log.py @@ -0,0 +1,39 @@ +import sys +import re +# Log messages sent from a script scraper instance are transmitted via stderr and are +# encoded with a prefix consisting of special character SOH, then the log +# level (one of t, d, i, w or e - corresponding to trace, debug, info, +# warning and error levels respectively), then special character +# STX. +# +# The log.trace, log.debug, log.info, log.warning, and log.error methods, and their equivalent +# formatted methods are intended for use by script scraper instances to transmit log +# messages. +# + +def __log(level_char: bytes, s): + if level_char: + lvl_char = "\x01{}\x02".format(level_char.decode()) + s = re.sub(r"data:image.+?;base64(.+?')","[...]",str(s)) + for x in s.split("\n"): + print(lvl_char, x, file=sys.stderr, flush=True) + + +def trace(s): + __log(b't', s) + + +def debug(s): + __log(b'd', s) + + +def info(s): + __log(b'i', s) + + +def warning(s): + __log(b'w', s) + + +def error(s): + __log(b'e', s) diff --git a/scrapers/py_common/package b/scrapers/py_common/package new file mode 100644 index 0000000..6c171ab --- /dev/null +++ b/scrapers/py_common/package @@ -0,0 +1,2 @@ +# script used as a dependency only +name: py_common diff --git a/scrapers/py_common/types.py b/scrapers/py_common/types.py new file mode 100644 index 0000000..6069296 --- /dev/null +++ b/scrapers/py_common/types.py @@ -0,0 +1,116 @@ +from typing import Literal, Required, TypedDict + +""" +Types for outputs that scrapers can produce and that Stash will accept +""" + +class ScrapedTag(TypedDict): + name: str + "Name is the only required field" + +class ScrapedPerformer(TypedDict, total=False): + name: Required[str] + "Name is the only required field" + disambiguation: str + "This is only added through Tagger view" + gender: Literal["MALE", "FEMALE", "TRANSGENDER_MALE", "TRANSGENDER_FEMALE", "INTERSEX", "NON_BINARY"] + url: str + twitter: str + instagram: str + birthdate: str + "Must be in the format YYYY-MM-DD" + death_date: str + "Must be in the format YYYY-MM-DD" + ethnicity: Literal["CAUCASIAN", "BLACK", "ASIAN", "INDIAN", "LATIN", "MIDDLE_EASTERN", "MIXED", "OTHER"] + country: str + "Not validated" + eye_color: Literal["BLUE", "BROWN", "GREEN", "GREY", "HAZEL", "RED"] + hair_color: Literal["BLONDE", "BRUNETTE", "BLACK", "RED", "AUBURN", "GREY", "BALD", "VARIOUS", "OTHER"] + "Hair color, can be 'VARIOUS' or 'OTHER' if the performer has multiple hair colors" + height: str + "Height in centimeters" + weight: str + "Weight in kilograms" + measurements: str + "bust-waist-hip measurements in centimeters, with optional cupsize for bust (e.g. 90-60-90, 90C-60-90)" + fake_tits: str + penis_length: str + circumcised: str + career_length: str + tattoos: str + piercings: str + aliases: str + "Must be comma-delimited in order to be parsed correctly" + tags: list[ScrapedTag] + image: str + images: list[str] + "Images can be URLs or base64-encoded images" + details: str + +class ScrapedStudio(TypedDict, total=False): + name: Required[str] + "Name is the only required field" + url: str + parent: 'ScrapedStudio' + image: str + +class ScrapedMovie(TypedDict, total=False): + name: Required[str] + date: str + "Must be in the format YYYY-MM-DD" + duration: str + "Duration in seconds" + director: str + synopsis: str + studio: ScrapedStudio + rating: str + front_image: str + back_image: str + url: str + aliases: str + +class ScrapedGallery(TypedDict, total=False): + title: Required[str] + details: str + url: str + urls: list[str] + date: str + "Must be in the format YYYY-MM-DD" + studio: ScrapedStudio + tags: list[ScrapedTag] + performers: list[ScrapedPerformer] + +class ScrapedScene(TypedDict, total=False): + title: str + details: str + url: str + urls: list[str] + date: str + image: str + studio: ScrapedStudio + movies: list[ScrapedMovie] + tags: list[ScrapedTag] + performers: list[ScrapedPerformer] + code: str + director: str + +# Technically we can return a full ScrapedPerformer but the current UI only +# shows the name. The URL is absolutely necesserary for the result to be used +# in the next step: actually scraping the performer +class PerformerSearchResult(TypedDict): + name: str + url: str + +# Technically we can return a full ScrapedScene but the current UI only +# shows the name, image, studio, tags and performers. The URL is absolutely +# necesserary for the result to be used in the next step: actually scraping the scene +class SceneSearchResult(TypedDict, total=False): + title: Required[str] + url: Required[str] + date: str + "Must be in the format YYYY-MM-DD" + image: str + "Image can be a URL or base64-encoded image" + tags: list[ScrapedTag] + performers: list[ScrapedPerformer] + studio: ScrapedStudio diff --git a/scrapers/py_common/util.py b/scrapers/py_common/util.py new file mode 100644 index 0000000..9022fa7 --- /dev/null +++ b/scrapers/py_common/util.py @@ -0,0 +1,258 @@ +from argparse import ArgumentParser +from functools import reduce +from typing import Any, Callable, TypeVar +from urllib.error import URLError +from urllib.request import Request, urlopen +import json +import sys + + +def dig(c: dict | list, *keys: str | int | tuple[str | int, ...], default=None) -> Any: + """ + Helper function to get a value from a nested dict or list + + If a key is a tuple the items will be tried in order until a value is found + + :param c: dict or list to search + :param keys: keys to search for + :param default: default value to return if not found + :return: value if found, None otherwise + + >>> obj = {"a": {"b": ["c", "d"], "f": {"g": "h"}}} + >>> dig(obj, "a", "b", 1) + 'd' + >>> dig(obj, "a", ("e", "f"), "g") + 'h' + """ + + def inner(d: dict | list, key: str | int | tuple): + if isinstance(d, dict): + if isinstance(key, tuple): + for k in key: + if k in d: + return d[k] + return d.get(key) + elif isinstance(d, list) and isinstance(key, int) and key < len(d): + return d[key] + else: + return default + + return reduce(inner, keys, c) # type: ignore + + +T = TypeVar("T") + + +def replace_all(obj: dict, key: str, replacement: Callable[[T], T]) -> dict: + """ + Helper function to recursively replace values in a nested dict, returning a new dict + + If the key refers to a list the replacement function will be called for each item + + :param obj: dict to search + :param key: key to search for + :param replacement: function called on the value to replace it + :return: new dict + + >>> obj = {"a": {"b": ["c", "d"], "f": {"g": "h"}}} + >>> replace(obj, "g", lambda x: x.upper()) # Replace a single item + {'a': {'b': ['c', 'd'], 'f': {'g': 'H'}}} + >>> replace(obj, "b", lambda x: x.upper()) # Replace all items in a list + {'a': {'b': ['C', 'D'], 'f': {'g': 'h'}}} + >>> replace(obj, "z", lambda x: x.upper()) # Do nothing if the key is not found + {'a': {'b': ['c', 'd'], 'f': {'g': 'h'}}} + """ + if not isinstance(obj, dict): + return obj + + new = {} + for k, v in obj.items(): + if k == key: + if isinstance(v, list): + new[k] = [replacement(x) for x in v] + else: + new[k] = replacement(v) + elif isinstance(v, dict): + new[k] = replace_all(v, key, replacement) + elif isinstance(v, list): + new[k] = [replace_all(x, key, replacement) for x in v] + else: + new[k] = v + return new + + +def replace_at(obj: dict, *path: str, replacement: Callable[[T], T]) -> dict: + """ + Helper function to replace a value at a given path in a nested dict, returning a new dict + + If the path refers to a list the replacement function will be called for each item + + If the path does not exist, the replacement function will not be called and the dict will be returned as-is + + :param obj: dict to search + :param path: path to search for + :param replacement: function called on the value to replace it + :return: new dict + + >>> obj = {"a": {"b": ["c", "d"], "f": {"g": "h"}}} + >>> replace_at(obj, "a", "f", "g", replacement=lambda x: x.upper()) # Replace a single item + {'a': {'b': ['c', 'd'], 'f': {'g': 'H'}}} + >>> replace_at(obj, "a", "b", replacement=lambda x: x.upper()) # Replace all items in a list + {'a': {'b': ['C', 'D'], 'f': {'g': 'h'}}} + >>> replace_at(obj, "a", "z", "g", replacement=lambda x: x.upper()) # Broken path, do nothing + {'a': {'b': ['c', 'd'], 'f': {'g': 'h'}}} + """ + + def inner(d: dict, *keys: str): + match keys: + case [k] if isinstance(d, dict) and k in d: + if isinstance(d[k], list): + return {**d, k: [replacement(x) for x in d[k]]} + return {**d, k: replacement(d[k])} + case [k, *ks] if isinstance(d, dict) and k in d: + return {**d, k: inner(d[k], *ks)} + case _: + return d + + return inner(obj, *path) # type: ignore + + +def is_valid_url(url): + """ + Checks if an URL is valid by making a HEAD request and ensuring the response code is 2xx + """ + try: + req = Request(url, method="HEAD") + with urlopen(req) as response: + return 200 <= response.getcode() < 300 + except URLError: + return False + + +def __default_parser(**kwargs): + parser = ArgumentParser(**kwargs) + # Some scrapers can take extra arguments so we can + # do rudimentary configuration in the YAML file + parser.add_argument("extra", nargs="*") + subparsers = parser.add_subparsers(dest="operation", required=True) + + # "Scrape with..." and the subsequent search box + subparsers.add_parser( + "performer-by-name", help="Search for performers" + ).add_argument("--name", help="Performer name to search for") + + # The results of performer-by-name will be passed to this + pbf = subparsers.add_parser("performer-by-fragment", help="Scrape a performer") + # Technically there's more information in this fragment, + # but in 99.9% of cases we only need the URL or the name + pbf.add_argument("--url", help="Scene URL") + pbf.add_argument("--name", help="Performer name to search for") + + # Filling in an URL and hitting the "Scrape" icon + subparsers.add_parser( + "performer-by-url", help="Scrape a performer by their URL" + ).add_argument("--url") + + # Filling in an URL and hitting the "Scrape" icon + subparsers.add_parser( + "movie-by-url", help="Scrape a movie by its URL" + ).add_argument("--url") + + # The looking glass search icon + # name field is guaranteed to be filled by Stash + subparsers.add_parser("scene-by-name", help="Scrape a scene by name").add_argument( + "--name", help="Name to search for" + ) + + # Filling in an URL and hitting the "Scrape" icon + subparsers.add_parser( + "scene-by-url", help="Scrape a scene by its URL" + ).add_argument("--url") + + # "Scrape with..." + sbf = subparsers.add_parser("scene-by-fragment", help="Scrape a scene") + sbf.add_argument("-u", "--url") + sbf.add_argument("--id") + sbf.add_argument("--title") # Title will be filename if not set in Stash + sbf.add_argument("--date") + sbf.add_argument("--details") + sbf.add_argument("--urls", nargs="+") + + # Tagger view or search box + sbqf = subparsers.add_parser("scene-by-query-fragment", help="Scrape a scene") + sbqf.add_argument("-u", "--url") + sbqf.add_argument("--id") + sbqf.add_argument("--title") # Title will be filename if not set in Stash + sbqf.add_argument("--code") + sbqf.add_argument("--details") + sbqf.add_argument("--director") + sbqf.add_argument("--date") + sbqf.add_argument("--urls", nargs="+") + + # Filling in an URL and hitting the "Scrape" icon + subparsers.add_parser( + "gallery-by-url", help="Scrape a gallery by its URL" + ).add_argument("--url", help="Gallery URL") + + # "Scrape with..." + gbf = subparsers.add_parser("gallery-by-fragment", help="Scrape a gallery") + gbf.add_argument("-u", "--url") + gbf.add_argument("--id") + gbf.add_argument("--title") + gbf.add_argument("--date") + gbf.add_argument("--details") + gbf.add_argument("--urls", nargs="+") + + return parser + + +def scraper_args(**kwargs): + """ + Helper function to parse arguments for a scraper + + This allows scrapers to be called from the command line without + piping JSON to stdin but also from Stash + + Returns a tuple of the operation and the parsed arguments: operation is one of + - performer-by-name + - performer-by-fragment + - performer-by-url + - movie-by-url + - scene-by-name + - scene-by-url + - scene-by-fragment + - scene-by-query-fragment + - gallery-by-url + - gallery-by-fragment + + A scraper can be configured to take extra arguments by adding them to the YAML file: + ```yaml + sceneByName: + action: script + script: + - python + - my-scraper.py + - extra + - args + - scene-by-name + ``` + + When called from Stash through the above configuration this function would return: + ```python + ("scene-by-name", {"extra": ["extra", "args"], "name": "scene name"}) + ``` + """ + + parser = __default_parser(**kwargs) + args = vars(parser.parse_args()) + + # If stdin is not connected to a TTY the script is being executed by Stash + if not sys.stdin.isatty(): + try: + stash_fragment = json.load(sys.stdin) + args.update(stash_fragment) + except json.decoder.JSONDecodeError: + # This would only happen if Stash passed invalid JSON + sys.exit(69) + + return args.pop("operation"), args