-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.ts
42 lines (30 loc) · 1.42 KB
/
index.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import { client } from "./weaviateClient/Client";
import { addSchema, deleteSchema } from "./weaviateClient/Schema/CrawledData";
import { runQuery } from "./weaviateClient/Query/query";
import { loadData } from "./weaviateClient/Index/indexVectorDB";
import { crawlWebsite } from "./webCrawler";
// Ideally these functions should be hosted on a server and different functions should be exposed as a different APIs.
async function main(): Promise<void>{
const websiteName = "https://quotes.toscrape.com/"; // Input 1 Website Name
//Call crawler here and push the data to loadData
//This crawler can't get by pass captchas and human verification to visit the actual website so use simple website only.
// This can crawl dynamic elements as well.
const crawledData = await crawlWebsite(websiteName)
await addSchema(); // This function should be run once only.
await loadData(crawledData);
let keyword = "scientist" // Input 2 This keyword can be anything you would like to search upon crawled data set max 1 - 2 word.
// This function will print the closest 3 data which webCrawler has indexed for the above website.
await runQuery(keyword);
}
main();
//deleteSchema()
// Print all the elements in the DB This is for logging purpose
client.schema.
getter().
do().
then(res => {
console.log(JSON.stringify(res));
}).
catch(err => {
console.log(err);
});