Skip to content

Commit

Permalink
faster (parallel) crawling
Browse files Browse the repository at this point in the history
including rate limiting
  • Loading branch information
mrkvon committed Jul 21, 2021
1 parent 84f2f05 commit 38cd187
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 48 deletions.
1 change: 1 addition & 0 deletions .eslintrc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,4 @@ rules:
semi:
- error
- never
no-debugger: warn
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@
- [ ] show clearly what are the directions of :knows
- [x] show also who knows this person
- [ ] login for different pod providers
- [ ] faster (parallel) crawling
- [x] faster (parallel) crawling
- [ ] search people
- [ ] highlight also people who know the person
- [ ] highlight people whose button is crawled in PersonList
- [ ] add custom starting point for crawling
- [ ] support extended profile (seeAlso, sameAs)
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
"d3-selection": "^3.0.0",
"d3-zoom": "^3.0.0",
"eslint-plugin-react-hooks": "^4.2.0",
"limiter": "^2.1.0",
"node-sass": "^6.0.1",
"numeric": "^1.2.6",
"rdf-namespaces": "^1.9.2",
Expand Down
2 changes: 1 addition & 1 deletion src/components/PeopleList.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ interface Props {

export const PeopleList = ({ people, ...props }: Props) => (
<div {...props}>
<div>{Object.keys(people).length}</div>
<div>total: {Object.keys(people).length}</div>
<div>
pending:{' '}
{
Expand Down
106 changes: 60 additions & 46 deletions src/data/BFSFriends.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,22 @@ import {
} from '@inrupt/solid-client'
import { foaf, vcard } from 'rdf-namespaces'
import { Person } from '../components/DataContainer'
import { RateLimiter } from 'limiter'

const limiter = new RateLimiter({ tokensPerInterval: 1, interval: 50 })

const limitedFetch: typeof fetch = async (...props) => {
await limiter.removeTokens(1)
return await fetch(...props)
}

export const findFriends = async (
webId: IriString,
): Promise<{ name: string; friends: IriString[]; photo: string }> => {
if (webId) {
const dataset = await getSolidDataset(getResourceUrl(webId), { fetch })
const dataset = await getSolidDataset(getResourceUrl(webId), {
fetch: limitedFetch,
})
const person = getThing(dataset, webId)
if (person) {
const friends = getTermAll(person, foaf.knows).map(a => a.value)
Expand Down Expand Up @@ -47,56 +57,60 @@ export const BFSFriends = (
.map(person => person.status)
.includes('pending')
) {
// take a first unvisited person
const unvisitedPerson = Object.values(people).find(
// take all unvisited persons
const unvisitedPersons = Object.values(people).filter(
person => person.status === 'pending',
)
if (unvisitedPerson) {
// fetch their friends
try {
const { name, photo, friends } = await findFriends(
unvisitedPerson.uri,
)
const unvisited: Person = {
...unvisitedPerson,
status: 'success',
knows: new Set(friends.map(uri => fixUri(uri))),
name,
photo,
}
if (unvisitedPersons.length > 0) {
await Promise.all(
unvisitedPersons.map(async unvisitedPerson => {
// fetch their friends
try {
const { name, photo, friends } = await findFriends(
unvisitedPerson.uri,
)
const unvisited: Person = {
...unvisitedPerson,
status: 'success',
knows: new Set(friends.map(uri => fixUri(uri))),
name,
photo,
}

// add their friends
const newlyFoundFriends = Object.fromEntries(
friends
.map(
uri =>
[
fixUri(uri),
{
status: 'pending',
knows: new Set(),
name: '',
uri: fixUri(uri),
} as Person,
] as [IriString, Person],
// add their friends
const newlyFoundFriends = Object.fromEntries(
friends
.map(
uri =>
[
fixUri(uri),
{
status: 'pending',
knows: new Set(),
name: '',
uri: fixUri(uri),
} as Person,
] as [IriString, Person],
)
.filter(([uri]) => !Object.keys(people).includes(uri)),
)
.filter(([uri]) => !Object.keys(people).includes(uri)),
)

people = {
...people,
...newlyFoundFriends,
[unvisited.uri]: unvisited,
}
} catch (e) {
// set their status to error
people = {
...people,
[unvisitedPerson.uri]: { ...unvisitedPerson, status: 'error' },
}
} finally {
onChange({ ...people })
}
people = {
...people,
...newlyFoundFriends,
[unvisited.uri]: unvisited,
}
} catch (e) {
// set their status to error
people = {
...people,
[unvisitedPerson.uri]: { ...unvisitedPerson, status: 'error' },
}
} finally {
onChange({ ...people })
}
}),
)
} else break
}
})()
Expand Down
12 changes: 12 additions & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -7585,6 +7585,11 @@ jsprim@^1.2.2:
array-includes "^3.1.2"
object.assign "^4.1.2"

[email protected]:
version "4.3.0"
resolved "https://registry.yarnpkg.com/just-performance/-/just-performance-4.3.0.tgz#cc2bc8c9227f09e97b6b1df4cd0de2df7ae16db1"
integrity sha512-L7RjvtJsL0QO8xFs5wEoDDzzJwoiowRw6Rn/GnvldlchS2JQr9wFYPiwZcDfrbbujEKqKN0tvENdbjXdYhDp5Q==

killable@^1.0.1:
version "1.0.1"
resolved "https://registry.yarnpkg.com/killable/-/killable-1.0.1.tgz#4c8ce441187a061c7474fb87ca08e2a638194892"
Expand Down Expand Up @@ -7665,6 +7670,13 @@ levn@~0.3.0:
prelude-ls "~1.1.2"
type-check "~0.3.2"

limiter@^2.1.0:
version "2.1.0"
resolved "https://registry.yarnpkg.com/limiter/-/limiter-2.1.0.tgz#d38d7c5b63729bb84fb0c4d8594b7e955a5182a2"
integrity sha512-361TYz6iay6n+9KvUUImqdLuFigK+K79qrUtBsXhJTLdH4rIt/r1y8r1iozwh8KbZNpujbFTSh74mJ7bwbAMOw==
dependencies:
just-performance "4.3.0"

lines-and-columns@^1.1.6:
version "1.1.6"
resolved "https://registry.yarnpkg.com/lines-and-columns/-/lines-and-columns-1.1.6.tgz#1c00c743b433cd0a4e80758f7b64a57440d9ff00"
Expand Down

0 comments on commit 38cd187

Please sign in to comment.