This repository has been archived by the owner on Apr 12, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcrowddev.py
61 lines (50 loc) · 1.88 KB
/
crowddev.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import requests
from datetime import datetime, timedelta
from apis.openai import generate_embeddings
from apis.qdrant import upsert
import os
from tqdm import tqdm
import dotenv
dotenv.load_dotenv(".env")
def get_activities(lastTimestamp, offset):
url = f"https://app.crowd.dev/api/tenant/{os.environ.get('CROWDDEV_TENANT_ID')}/activity/query"
payload = {
"limit": 200,
"offset": offset,
"filter": {
"createdAt": {"gte": lastTimestamp},
"timestamp": {"gte": "2022-09-01"},
},
"orderBy": "timestamp_DESC",
}
headers = {
"accept": "application/json",
"content-type": "application/json",
"authorization": f"Bearer {os.environ.get('CROWDDEV_API_KEY')}",
}
response = requests.post(url, json=payload, headers=headers)
return response.json()["rows"]
def embed_activities(lastTimestamp, start_offset=0):
offset = start_offset
rows = get_activities(lastTimestamp, start_offset)
while len(rows) > 0:
for row in tqdm(rows, total=len(rows)):
payload = {
"url": row["url"],
"timestamp": row["createdAt"],
"title": row["title"],
"body": row["body"],
"platform": row["platform"],
"member": row["member"]["displayName"],
# Everything except the above
"attributes": row["attributes"],
}
embed = generate_embeddings(payload["title"] + "\n" + payload["body"])
if embed:
idd = abs(hash(str(row["id"])))
upsert([idd], [payload], [embed])
offset += 200
rows = get_activities(lastTimestamp, offset)
DATE = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
# DATE = (datetime.now() - timedelta(days=17)).strftime('%Y-%m-%d')
embed_activities(DATE, start_offset=0)