From 38cd1876801d857dad93cb34d720dde008aa46fb Mon Sep 17 00:00:00 2001 From: mrkvon Date: Wed, 21 Jul 2021 14:20:50 +0200 Subject: [PATCH] faster (parallel) crawling including rate limiting --- .eslintrc.yml | 1 + README.md | 3 +- package.json | 1 + src/components/PeopleList.tsx | 2 +- src/data/BFSFriends.ts | 106 +++++++++++++++++++--------------- yarn.lock | 12 ++++ 6 files changed, 77 insertions(+), 48 deletions(-) diff --git a/.eslintrc.yml b/.eslintrc.yml index 1b46da9..7363843 100644 --- a/.eslintrc.yml +++ b/.eslintrc.yml @@ -32,3 +32,4 @@ rules: semi: - error - never + no-debugger: warn diff --git a/README.md b/README.md index c64a0c7..607ade1 100644 --- a/README.md +++ b/README.md @@ -9,8 +9,9 @@ - [ ] show clearly what are the directions of :knows - [x] show also who knows this person - [ ] login for different pod providers -- [ ] faster (parallel) crawling +- [x] faster (parallel) crawling - [ ] search people - [ ] highlight also people who know the person - [ ] highlight people whose button is crawled in PersonList - [ ] add custom starting point for crawling +- [ ] support extended profile (seeAlso, sameAs) diff --git a/package.json b/package.json index aaf99c0..607cd0a 100644 --- a/package.json +++ b/package.json @@ -28,6 +28,7 @@ "d3-selection": "^3.0.0", "d3-zoom": "^3.0.0", "eslint-plugin-react-hooks": "^4.2.0", + "limiter": "^2.1.0", "node-sass": "^6.0.1", "numeric": "^1.2.6", "rdf-namespaces": "^1.9.2", diff --git a/src/components/PeopleList.tsx b/src/components/PeopleList.tsx index 4b40bc9..7859ac1 100644 --- a/src/components/PeopleList.tsx +++ b/src/components/PeopleList.tsx @@ -7,7 +7,7 @@ interface Props { export const PeopleList = ({ people, ...props }: Props) => (
-
{Object.keys(people).length}
+
total: {Object.keys(people).length}
pending:{' '} { diff --git a/src/data/BFSFriends.ts b/src/data/BFSFriends.ts index e81518a..acb1c57 100644 --- a/src/data/BFSFriends.ts +++ b/src/data/BFSFriends.ts @@ -8,12 +8,22 @@ import { } from '@inrupt/solid-client' import { foaf, vcard } from 'rdf-namespaces' import { Person } from '../components/DataContainer' +import { RateLimiter } from 'limiter' + +const limiter = new RateLimiter({ tokensPerInterval: 1, interval: 50 }) + +const limitedFetch: typeof fetch = async (...props) => { + await limiter.removeTokens(1) + return await fetch(...props) +} export const findFriends = async ( webId: IriString, ): Promise<{ name: string; friends: IriString[]; photo: string }> => { if (webId) { - const dataset = await getSolidDataset(getResourceUrl(webId), { fetch }) + const dataset = await getSolidDataset(getResourceUrl(webId), { + fetch: limitedFetch, + }) const person = getThing(dataset, webId) if (person) { const friends = getTermAll(person, foaf.knows).map(a => a.value) @@ -47,56 +57,60 @@ export const BFSFriends = ( .map(person => person.status) .includes('pending') ) { - // take a first unvisited person - const unvisitedPerson = Object.values(people).find( + // take all unvisited persons + const unvisitedPersons = Object.values(people).filter( person => person.status === 'pending', ) - if (unvisitedPerson) { - // fetch their friends - try { - const { name, photo, friends } = await findFriends( - unvisitedPerson.uri, - ) - const unvisited: Person = { - ...unvisitedPerson, - status: 'success', - knows: new Set(friends.map(uri => fixUri(uri))), - name, - photo, - } + if (unvisitedPersons.length > 0) { + await Promise.all( + unvisitedPersons.map(async unvisitedPerson => { + // fetch their friends + try { + const { name, photo, friends } = await findFriends( + unvisitedPerson.uri, + ) + const unvisited: Person = { + ...unvisitedPerson, + status: 'success', + knows: new Set(friends.map(uri => fixUri(uri))), + name, + photo, + } - // add their friends - const newlyFoundFriends = Object.fromEntries( - friends - .map( - uri => - [ - fixUri(uri), - { - status: 'pending', - knows: new Set(), - name: '', - uri: fixUri(uri), - } as Person, - ] as [IriString, Person], + // add their friends + const newlyFoundFriends = Object.fromEntries( + friends + .map( + uri => + [ + fixUri(uri), + { + status: 'pending', + knows: new Set(), + name: '', + uri: fixUri(uri), + } as Person, + ] as [IriString, Person], + ) + .filter(([uri]) => !Object.keys(people).includes(uri)), ) - .filter(([uri]) => !Object.keys(people).includes(uri)), - ) - people = { - ...people, - ...newlyFoundFriends, - [unvisited.uri]: unvisited, - } - } catch (e) { - // set their status to error - people = { - ...people, - [unvisitedPerson.uri]: { ...unvisitedPerson, status: 'error' }, - } - } finally { - onChange({ ...people }) - } + people = { + ...people, + ...newlyFoundFriends, + [unvisited.uri]: unvisited, + } + } catch (e) { + // set their status to error + people = { + ...people, + [unvisitedPerson.uri]: { ...unvisitedPerson, status: 'error' }, + } + } finally { + onChange({ ...people }) + } + }), + ) } else break } })() diff --git a/yarn.lock b/yarn.lock index 45577f3..87ca864 100644 --- a/yarn.lock +++ b/yarn.lock @@ -7585,6 +7585,11 @@ jsprim@^1.2.2: array-includes "^3.1.2" object.assign "^4.1.2" +just-performance@4.3.0: + version "4.3.0" + resolved "https://registry.yarnpkg.com/just-performance/-/just-performance-4.3.0.tgz#cc2bc8c9227f09e97b6b1df4cd0de2df7ae16db1" + integrity sha512-L7RjvtJsL0QO8xFs5wEoDDzzJwoiowRw6Rn/GnvldlchS2JQr9wFYPiwZcDfrbbujEKqKN0tvENdbjXdYhDp5Q== + killable@^1.0.1: version "1.0.1" resolved "https://registry.yarnpkg.com/killable/-/killable-1.0.1.tgz#4c8ce441187a061c7474fb87ca08e2a638194892" @@ -7665,6 +7670,13 @@ levn@~0.3.0: prelude-ls "~1.1.2" type-check "~0.3.2" +limiter@^2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/limiter/-/limiter-2.1.0.tgz#d38d7c5b63729bb84fb0c4d8594b7e955a5182a2" + integrity sha512-361TYz6iay6n+9KvUUImqdLuFigK+K79qrUtBsXhJTLdH4rIt/r1y8r1iozwh8KbZNpujbFTSh74mJ7bwbAMOw== + dependencies: + just-performance "4.3.0" + lines-and-columns@^1.1.6: version "1.1.6" resolved "https://registry.yarnpkg.com/lines-and-columns/-/lines-and-columns-1.1.6.tgz#1c00c743b433cd0a4e80758f7b64a57440d9ff00"