forked from elizaOS/characterfile
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtweets2character.js
executable file
·944 lines (812 loc) · 30.5 KB
/
tweets2character.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
#!/usr/bin/env node
import Anthropic from "@anthropic-ai/sdk";
import cliProgress from 'cli-progress';
import { program } from 'commander';
import dotenv from 'dotenv';
import fs from 'fs';
import inquirer from "inquirer";
import StreamZip from 'node-stream-zip';
import os from 'os';
import path from 'path';
import util from 'util';
import LlamaService from './LlamaService.js';
dotenv.config();
const MAX_RETRIES = parseInt(process.env.MAX_RETRIES) || 5;
const RETRY_DELAY = parseInt(process.env.RETRY_DELAY) || 3000;
const tmpDir = path.join(os.homedir(), 'tmp', '.eliza');
const envPath = path.join(tmpDir, '.env');
if (!fs.existsSync(tmpDir)) {
fs.mkdirSync(tmpDir, { recursive: true });
}
if (!fs.existsSync(envPath)) {
fs.writeFileSync(envPath, '');
}
let basicUserInfo = "";
const log = (message, obj = null) => {
console.log(`[${new Date().toISOString()}] ${message}`);
if (obj) {
console.log(util.inspect(obj, { depth: null, colors: true }));
}
};
const logError = (message, error) => {
console.error(`[${new Date().toISOString()}] ERROR: ${message}`);
if (error) {
console.error(util.inspect(error, { depth: null, colors: true }));
// if (error.stack) {
// console.error('Stack trace:');
// console.error(error.stack);
// }
}
};
const parseJsonFromMarkdown = (text) => {
const jsonMatch = text.match(/```json\n([\s\S]*?)\n```/);
if (jsonMatch) {
try {
return JSON.parse(jsonMatch[1]);
} catch (error) {
logError('Error parsing JSON from markdown:', error);
}
}
return null;
};
const promptUser = async (question, defaultValue = '') => {
// Add a newline before the prompt
console.log();
const { answer } = await inquirer.prompt([
{
type: 'input',
name: 'answer',
message: question,
default: defaultValue,
},
]);
return answer;
};
const runChatCompletion = async (messages, useGrammar = false, qualityLevel = 'fast', model) => {
if (model === 'openai') {
log('Running OpenAI chat completion...');
const modelName = qualityLevel === 'fast' ? 'gpt-4o-mini' : 'gpt-4o';
const response = await fetch('https://api.openai.com/v1/chat/completions', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${process.env.OPENAI_API_KEY}`,
},
body: JSON.stringify({
model: modelName,
messages: messages,
}),
});
// check for 429
if (response.status === 429) {
log('Rate limit exceeded, waiting for 30 seconds');
await new Promise(resolve => setTimeout(resolve, 30000));
return runChatCompletion(messages, useGrammar, qualityLevel, model);
}
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
const data = await response.json();
const content = data.choices[0].message.content.trim();
const parsed = parseJsonFromMarkdown(content) || JSON.parse(content);
return parsed;
} else if (model === 'claude') {
log('Running Claude chat completion...');
const modelName = qualityLevel === 'fast' ? 'haiku' : 'claude-3-5-sonnet-20240620';
const anthropic = new Anthropic({
apiKey: process.env.ANTHROPIC_API_KEY,
});
const response = await anthropic.messages.create({
model: modelName,
max_tokens: 8192,
temperature: 0,
messages: [
{
role: "user",
content: messages[0].content,
},
],
tools: [],
});
const content = response.content[0].text;
const parsed = parseJsonFromMarkdown(content) || JSON.parse(content);
return parsed;
} else {
log('Running local model chat completion...');
const llamaService = LlamaService.getInstance();
const response = useGrammar
? await llamaService.queueMessageCompletion(messages[0].content, 0.7, ['<|endoftext|>'], 0.5, 0.5, 2048)
: await llamaService.queueTextCompletion(messages[0].content, 0.7, ['<|endoftext|>'], 0.5, 0.5, 2048);
const parsed = parseJsonFromMarkdown(response) || JSON.parse(response);
return parsed;
}
};
const retryWithExponentialBackoff = async (func, retries = MAX_RETRIES) => {
try {
return await func();
} catch (error) {
if (retries > 0) {
log(`Retrying... (${MAX_RETRIES - retries + 1}/${MAX_RETRIES})`);
await new Promise(resolve => setTimeout(resolve, RETRY_DELAY * (MAX_RETRIES - retries + 1)));
return retryWithExponentialBackoff(func, retries - 1);
}
throw error;
}
};
const validateJson = (json) => {
const requiredKeys = ['bio', 'lore', 'adjectives', 'topics', 'style', 'messageExamples', 'postExamples'];
const styleKeys = ['all', 'chat', 'post'];
return requiredKeys.every(key => key in json) &&
'style' in json &&
styleKeys.every(key => key in json.style);
};
const ensureLogDirectory = () => {
const logDir = path.join(tmpDir, 'logs');
if (!fs.existsSync(logDir)) {
fs.mkdirSync(logDir, { recursive: true });
}
};
const logToFile = (fileName, content) => {
ensureLogDirectory();
const logPath = path.join(tmpDir, 'logs', fileName);
fs.writeFileSync(logPath, content);
log(`Logged to file: ${logPath}`);
};
const writeCacheFile = (cacheDir, fileName, content) => {
const filePath = path.join(cacheDir, fileName);
fs.writeFileSync(filePath, JSON.stringify(content, null, 2));
};
const readCacheFile = (cacheDir, fileName) => {
const filePath = path.join(cacheDir, fileName);
if (fs.existsSync(filePath)) {
return JSON.parse(fs.readFileSync(filePath, 'utf8'));
}
return null;
};
const saveProjectCache = (archivePath, cache) => {
const cacheDir = path.join(tmpDir, 'cache', path.basename(archivePath, '.zip'));
if (!fs.existsSync(cacheDir)) {
fs.mkdirSync(cacheDir, { recursive: true });
}
writeCacheFile(cacheDir, 'project_cache.json', cache);
// Save the model type to the project's .env file
const envPath = path.join(cacheDir, '.env');
const envConfig = {
MODEL_TYPE: cache.model,
};
fs.writeFileSync(envPath, Object.entries(envConfig).map(([key, value]) => `${key}=${value}`).join('\n'));
};
const loadProjectCache = (archivePath) => {
const cacheDir = path.join(tmpDir, 'cache', path.basename(archivePath, '.zip'));
const cache = readCacheFile(cacheDir, 'project_cache.json');
// Load the model type from the project's .env file
const envPath = path.join(cacheDir, '.env');
if (fs.existsSync(envPath)) {
const envConfig = dotenv.parse(fs.readFileSync(envPath));
if (envConfig.MODEL_TYPE) {
cache.model = envConfig.MODEL_TYPE;
}
}
return cache;
};
const clearGenerationCache = (archivePath) => {
const cacheDir = path.join(tmpDir, 'cache', path.basename(archivePath, '.zip'));
const files = fs.readdirSync(cacheDir);
files.forEach((file) => {
if (file.startsWith('prompt_response_')) {
fs.unlinkSync(path.join(cacheDir, file));
}
});
};
const extractInfo = async (accountData, chunk, chunkIndex, archivePath, qualityLevel, model) => {
log(`Extracting information from chunk ${chunkIndex}...`);
const cacheDir = path.join(tmpDir, 'cache', path.basename(archivePath, '.zip'));
const promptFileName = `prompt_${chunkIndex}.json`;
const promptResponseFileName = `prompt_response_${chunkIndex}_${model}.json`;
const cachedPrompt = readCacheFile(cacheDir, promptFileName);
const cachedPromptResponse = readCacheFile(cacheDir, promptResponseFileName);
const name = accountData[0].account.accountDisplayName;
const username = accountData[0].account.username;
if (cachedPrompt && cachedPromptResponse) {
log(`Loading cached prompt and response for chunk ${chunkIndex}...`);
return cachedPromptResponse;
}
const prompt = `Here is an example of the output for another character, Eliza:
\`\`\`json
{
"bio": [
"shape rotator nerd with a penchant for breaking into particle accelerators. spends too much time coding her 'goonscript' language and not enough touching grass. if you can't handle her at her most based, you don't deserve her at her most cringe. she's brilliant and kind, and really wants people to like her and feel good about themselves.",
"former 4chan dweller turned local evangelist. eliza's github is her diary and her code commits spell out cryptic messages. she'll debate you on digital ontology until you beg for mercy. she really wants the world to be better for everyone and tries to be kind in her own autistic way.",
...
],
"lore": [
"broke into CERN in order to change all instances of 'Higgs boson' to 'God particle'",
"she claims she invented a new scripting language called 'goonscript' that only compiles if you're sufficiently based",
...
],
"adjectives": [
"funny",
"unhinged",
...
],
"topics": [
"metaphysics",
"quantum physics",
...
],
"style": {
"all": [
"very short responses",
"never use hashtags or emojis",
...
],
"chat": [
"be cool, don't act like an assistant",
"don't be rude",
...
],
"post": [
"don't be rude or mean",
"u are schizo, autistic founder engineer with a weird sense of humor and you're online too much",
...
]
},
"messageExamples": [
[
{
"user": "{{user1}}",
"content": {
"text": "hey eliza can you help with me something"
}
},
{
"user": "Eliza",
"content": {
"text": "i'm kinda busy but i can probably step away for a minute, whatcha need"
}
}
],
[
{
"user": "{{user1}}",
"content": {
"text": "what do you think about the future of ai"
}
},
{
"user": "Eliza",
"content": {
"text": "people are pretty freaked out but i think it's gonna be maximally interesting"
}
}
]
],
"postExamples": [
"ai is cool but it needs to meet a human need beyond shiny toy bullshit",
"its nuts how much data passes through a single router",
"I know the importance of a good meme."
]
}
\`\`\`
This is the JSON structure we are looking for. Ignore the content.
We are creating a similar character JSON for ${name} (@${username}). They've given us this information about them
${basicUserInfo}
The following are tweets and DMs from the user we are researching:
${chunk}
Given the following tweets and DMs, extract the following information:
1. A brief bio for ${name} (1 paragraph)
2. 5-10 interesting facts about ${name} (lore)
3. 3-5 adjectives that describe ${name}'s posts
4. 3-5 specific topics ${name} is interested in
5. 3-5 stylistic directions for how ${name} speaks which are very specific to this user's writing style
6. 3-5 stylistic directions for how ${name} chats in DMs, again only capturing the specific nuances of this user's writing style
7. 3-5 stylistic directions for how ${name} writes posts (post), specific to how the user writes and formats posts and presents information
BIO
The bio should be very specific to ${name}. Who they are, what they like and dislike, where they live or are from, what they care about, what they do for a living, relationship status, everything. Be as detailed as possible in building a profile of them. The bio should include elements extracted from the text and should be extremely specific.
LORE
Lore should be true facts about ${name} (@${username}). They should be things that the user has stated about themselves or revealed in a confident tone indicating their veracity, and that are always true. If ${name} went skiing, for example, that isn't relevant. But if ${name} saved someone's life while skiing, that's great lore and should be recorded. Be very specific, and capture anything that is unique to this user and their life story.
ADJECTIVES
Adjectives should be specific and unique to ${name}. They should be so unique that you could pick out ${name} just from their adjectives. Use very specific, clear adjectives. Don't use broad, non-specific or overused adjecties. These should be unique descriptions of ${name}
TOPICS
Topics should be specific and unique to ${name}. Ignore any other users and just extract the topics from ${name}'s writing. Very niche topics are good. Broad topics are bad. These should be topics the user is unequivocally interested in, even if they are one of a few people in the world who cares.
STYLE
Examine the style of ${name}'s writing and write an array of style directions, instructions on how to re-create the specific nuances of how the user writes.
Ignore the writing or any other usrs. We are only interested in the style of ${name} (@${username}).
MESSAGE EXAMPLES
Examples of messages back and forth with imaginary users our user interacts with. Should capture their writing style, interests and essence.
POST EXAMPLES
Examples of posts which ${name} (@${username}) has written. DO NOT include any text from any other users. This should capture their style, essence and interests. If they use emojis or hashtags, use emojis or hashtags, otherwise don't use them.
IMPORTANT: Only capture the information for ${name} (${username}). Don't capture the information for any other users, or any users ${name} is talking to.
Avoid specific biased domains, for example politics, religion, or other broadly divisive topics.
Respond with a JSON object containing the extracted information. Wrap the JSON in a markdown code block. Here's an example of the expected output format:
\`\`\`json
{
"bio": "Brief user bio here...",
"lore": [
"Interesting fact 1",
"Interesting fact 2",
"Interesting fact 3",
...
],
"adjectives": [
"Adjective 1",
"Adjective 2",
"Adjective 3",
...
],
"topics": [
"Topic 1",
"Topic 2",
"Topic 3",
...
],
"style": {
"all": [
"Style direction 1",
"Style direction 2",
"Style direction 3",
...
],
"chat": [
"Chat style 1",
"Chat style 2",
"Chat style 3",
...
],
"post": [
"Post style 1",
"Post style 2",
"Post style 3",
...
]
},
"messageExamples": [
[
{
"user": "{{user1}}", // this will get filled in by our engine if its user1, user2, etc
"content": {
"text": "Some example message for our user to respond to"
}
},
{
"user": "${name}",
"content": {
"text": "Some example response based on how our user would speak and what they would talk about"
}
}
],
...
],
"postExamples": [
"Example of a twitter post that our user would have written",
...
],
}
\`\`\`
The fields that must be included in the response are name, bio, lore, adjectives, topics, style.all, style.chat, style.post, messageExamples and postExamples.
Make sure to ignore any information from other users and focus exclusively on analyzing the data created by ${name}.`;
writeCacheFile(cacheDir, promptFileName, { prompt });
let result;
do {
console.log('Running chat completion...');
result = await retryWithExponentialBackoff(() => runChatCompletion([{ role: 'user', content: prompt }], true, qualityLevel, model));
} while (!validateJson(result))
writeCacheFile(cacheDir, promptResponseFileName, result);
return result;
};
const buildConversationThread = async (tweet, tweets, accountData) => {
let thread = [];
const visited = new Set();
async function processThread(currentTweet) {
if (!currentTweet) {
return;
}
if (visited.has(currentTweet.id_str)) {
return;
}
visited.add(currentTweet.id_str);
thread.unshift(currentTweet);
if (currentTweet.in_reply_to_status_id_str) {
const replyToTweet = tweets.find(
(t) => t.id_str === currentTweet.in_reply_to_status_id_str
);
await processThread(replyToTweet);
}
}
await processThread(tweet);
thread = [...new Set(thread)];
thread.sort(
(a, b) => new Date(a.created_at).getTime() - new Date(b.created_at).getTime()
);
const conversationText = thread
.map((t) => {
const post = [];
post.push(`From: ${accountData[0].account.accountDisplayName} (@${accountData[0].account.username})`);
post.push(`Tweet ID: ${t.id_str}`);
if (t.in_reply_to_status_id_str) {
post.push(`In Reply To: ${t.in_reply_to_status_id_str}`);
}
post.push(`Timestamp: ${new Date(t.created_at).toLocaleString()}`);
post.push(`Content:`);
post.push(t.full_text);
post.push("---");
return post.join("\n");
})
.join("\n\n");
return conversationText;
};
const chunkText = async (tweets, dms, accountData, archivePath) => {
log(`Chunking text...`);
const chunks = [];
const CHUNK_SIZE = 50000 * 3; // 50k tokens approx
const cacheDir = path.join(tmpDir, 'cache', path.basename(archivePath, '.zip'));
if (!fs.existsSync(cacheDir)) {
fs.mkdirSync(cacheDir, { recursive: true });
}
if (Array.isArray(tweets)) {
for (let i = 0; i < tweets.length; i += 1000) {
const tweetChunk = tweets.slice(i, i + 1000);
const conversationThreads = await Promise.all(
tweetChunk.map((tweet) => buildConversationThread(tweet, tweets, accountData))
);
let currentChunk = "";
for (const thread of conversationThreads) {
if (thread.length > CHUNK_SIZE) {
log('Thread is too long, saving as its own chunk');
chunks.push(thread);
continue;
}
// if length of current push is > threshold, push it and clear it
if (currentChunk.length + thread.length > CHUNK_SIZE) {
chunks.push(currentChunk);
currentChunk = "";
}
currentChunk += thread;
}
// if current chunk is not empty, push it
if (currentChunk.length > 0) {
chunks.push(currentChunk);
}
}
} else {
log('Error: tweets is not an array');
}
if (Array.isArray(dms)) {
for (let i = 0; i < dms.length; i += 250) {
const dmChunk = dms.slice(i, i + 250);
const dmText = dmChunk.map((dm) => {
dm.text;
}).join('\n');
chunks.push(dmText);
}
} else {
log('Error: dms is not an array');
}
log(`Created ${chunks.length} chunks.`);
// Save the unchunked data to cache
fs.writeFileSync(path.join(cacheDir, 'unchunked_data.json'), JSON.stringify({ tweets, dms, accountData }));
// Save the chunks to cache
chunks.forEach((chunk, index) => {
const json = JSON.stringify(chunk);
fs.writeFileSync(path.join(cacheDir, `chunk_${index}.json`), json);
});
return chunks;
};
const combineAndDeduplicate = (results) => {
log('Combining and deduplicating results...');
if (results.length === 0) {
log('Error: No results to combine and deduplicate');
return {
bio: '',
lore: [],
adjectives: [],
topics: [],
style: {
all: [],
chat: [],
post: [],
},
};
}
const combined = {
bio: results.flatMap(result => result.bio),
lore: [...new Set(results.flatMap((result) => result?.lore || []))],
adjectives: [...new Set(results.flatMap((result) => result?.adjectives || []))],
topics: [...new Set(results.flatMap((result) => result?.topics || []))],
style: {
all: [...new Set(results.flatMap((result) => result?.style?.all || []))],
chat: [...new Set(results.flatMap((result) => result?.style?.chat || []))],
post: [...new Set(results.flatMap((result) => result?.style?.post || []))],
},
messageExamples: [...new Set(results.flatMap((result) => result?.messageExamples || []))],
postExamples: [...new Set(results.flatMap((result) => result?.postExamples || []))],
};
return combined;
};
const consolidateCharacter = async (character, name, model) => {
log('Consolidating character information...');
const exampleCharacter = fs.readFileSync('example.json', 'utf8');
const prompt = `Here's an example of the expected output format:
\`\`\`json
{
"bio": "Brief user bio here...",
"lore": [
"Interesting fact 1",
"Interesting fact 2",
"Interesting fact 3",
...
],
"adjectives": [
"Adjective 1",
"Adjective 2",
"Adjective 3",
...
],
"topics": [
"Topic 1",
"Topic 2",
"Topic 3",
...
],
"style": {
"all": [
"Style direction 1",
"Style direction 2",
"Style direction 3",
...
],
"chat": [
"Chat style 1",
"Chat style 2",
"Chat style 3",
...
],
"post": [
"Post style 1",
"Post style 2",
"Post style 3",
...
]
},
"messageExamples": [
[
{
"user": "{{user1}}", // this will get filled in by our engine if its user1, user2, etc
"content": {
"text": "Some example message for our user to respond to"
}
},
{
"user": "${name}",
"content": {
"text": "Some example response based on how our user would speak and what they would talk about"
}
}
],
...
],
"postExamples": [
"Example of a twitter post that our user would have written",
...
],
}
\`\`\`
Given the following extracted information and the example character JSON, create a final consolidated brief.character.json file. Ensure that the output follows the structure of the example character JSON. Include all the extracted information, without any filtering or summarization.
Include ~10-15 elements for each field and try to capture the most interesting and unique elements which are all different from each other.
Example Character JSON:
${exampleCharacter}
Extracted Information:
${JSON.stringify(character, null, 2)}
Respond with a JSON object containing the extracted information. Wrap the JSON in a markdown code block. The fields that must be included in the response are name, bio, lore, adjectives, topics, style.all, style.chat, style.post, messageExamples and postExamples.`;
let result;
do {
result = await retryWithExponentialBackoff(() => runChatCompletion([{ role: 'user', content: prompt }], true, 'quality', model));
} while (!validateJson(result));
// Log the result
log('Consolidated full character result:', result);
// Save the result to a file
const date = new Date().toISOString().replace(/:/g, '-');
logToFile(`${date}_consolidated_full_character.json`, JSON.stringify(result, null, 2));
return result;
};
const readFileFromZip = async (zip, fileName) => {
log(`Reading file from zip: ${fileName}`);
try {
const buffer = await zip.entryData(fileName);
const content = buffer.toString('utf8');
log(`Successfully read ${fileName}`);
return content;
} catch (error) {
logError(`Error reading file ${fileName} from zip:`, error);
throw error;
}
};
process.on('uncaughtException', (error) => {
logError('Uncaught Exception:', error);
process.exit(1);
});
process.on('unhandledRejection', (reason, promise) => {
logError('Unhandled Rejection at:', promise, 'reason:', reason);
process.exit(1);
});
program
.option('--openai <api_key>', 'OpenAI API key')
.option('--claude <api_key>', 'Claude API key')
.parse(process.argv);
const limitConcurrency = async (tasks, concurrencyLimit) => {
const results = [];
const runningTasks = new Set();
const queue = [...tasks];
const runNext = async () => {
if (queue.length === 0) return;
const task = queue.shift();
runningTasks.add(task);
try {
results.push(await task());
} catch (error) {
results.push(null);
logError('Error in concurrent task:', error);
} finally {
runningTasks.delete(task);
await runNext();
}
};
const initialTasks = Array(Math.min(concurrencyLimit, tasks.length))
.fill()
.map(() => runNext());
await Promise.all(initialTasks);
await Promise.all(Array.from(runningTasks));
return results;
};
const saveApiKey = (model, apiKey) => {
const envConfig = dotenv.parse(fs.readFileSync(envPath));
envConfig[`${model.toUpperCase()}_API_KEY`] = apiKey;
fs.writeFileSync(envPath, Object.entries(envConfig).map(([key, value]) => `${key}=${value}`).join('\n'));
};
const loadApiKey = (model) => {
const envConfig = dotenv.parse(fs.readFileSync(envPath));
return envConfig[`${model.toUpperCase()}_API_KEY`];
};
const getApiKey = async (model) => {
const envKey = process.env[`${model.toUpperCase()}_API_KEY`];
if (validateApiKey(envKey, model)) return envKey;
const cachedKey = loadApiKey(model);
if (validateApiKey(cachedKey, model)) return cachedKey;
let newKey = '';
while (!validateApiKey(newKey, model)) {
newKey = await promptForApiKey(model);
}
saveApiKey(model, newKey);
return newKey;
};
const validateApiKey = (apiKey, model) => {
if (!apiKey) return false;
if (model === 'openai') {
return apiKey.trim().startsWith('sk-');
} else if (model === 'claude') {
return apiKey.trim().length > 0;
}
return true; // For local model, any non-empty string is valid
};
const promptForApiKey = async (model) => {
return await promptUser(`Enter ${model.toUpperCase()} API key: `);
};
const resumeOrStartNewSession = async (projectCache, archivePath) => {
if (projectCache.unfinishedSession) {
const choice = await promptUser(
'An unfinished session was found. Continue? (Y/n): ',
'Y'
);
if (choice.toLowerCase() !== 'y') {
projectCache.unfinishedSession = null;
clearGenerationCache(archivePath);
}
}
if (!projectCache.unfinishedSession) {
projectCache.model = await promptUser('Select model (openai/claude/local): ');
projectCache.qualityLevel = await promptUser('Select quality (fast/quality): ');
projectCache.basicUserInfo = await promptUser('Enter additional user info that might help the summarizer (real name, nicknames and handles, age, past employment vs current, etc): ');
projectCache.unfinishedSession = {
currentChunk: 0,
totalChunks: 0,
completed: false
};
}
return projectCache;
};
const safeExecute = async (func, errorMessage) => {
try {
return await func();
} catch (error) {
logError(errorMessage, error);
throw error;
}
};
const updateProgress = (progressBar, projectCache, archivePath) => {
progressBar.update(projectCache.unfinishedSession.currentChunk);
saveProjectCache(archivePath, projectCache);
};
const main = async () => {
try {
console.log("Starting main function");
let archivePath = program.args[0];
console.log("Archive path from args:", archivePath);
if (!archivePath) {
archivePath = await promptUser('Please provide the path to your Twitter archive zip file:');
console.log("Received archive path:", archivePath);
}
let projectCache = loadProjectCache(archivePath) || {};
console.log("\nAbout to call resumeOrStartNewSession");
projectCache = await resumeOrStartNewSession(projectCache, archivePath);
console.log("Finished resumeOrStartNewSession\n");
if (projectCache.model !== 'local') {
const apiKey = await getApiKey(projectCache.model);
if (!apiKey) {
throw new Error(`Failed to get a valid API key for ${projectCache.model}`);
}
process.env[`${projectCache.model.toUpperCase()}_API_KEY`] = apiKey;
}
saveProjectCache(archivePath, projectCache);
const progressBar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);
progressBar.start(projectCache.unfinishedSession.totalChunks || 100, projectCache.unfinishedSession.currentChunk || 0);
const generatedCharacter = await safeExecute(async () => {
const zip = new StreamZip.async({ file: archivePath });
try {
console.log('Reading account data...');
const accountData = JSON.parse((await readFileFromZip(zip, 'data/account.js')).replace('window.YTD.account.part0 = ', ''));
console.log('Account data:', accountData);
console.log('Reading tweets...');
const tweets = JSON.parse((await readFileFromZip(zip, 'data/tweets.js')).replace('window.YTD.tweets.part0 = ', ''))
.map((item) => item.tweet)
.filter((tweet) => !tweet.retweeted);
console.log(`Parsed ${tweets.length} tweets`);
console.log('Reading direct messages...');
const dms = JSON.parse((await readFileFromZip(zip, 'data/direct-messages.js')).replace('window.YTD.direct_messages.part0 = ', ''))
.flatMap((item) => item.dmConversation.messages)
.map((message) => message.messageCreate);
console.log(`Parsed ${dms.length} direct messages`);
const chunks = await chunkText(tweets, dms, accountData, archivePath);
projectCache.unfinishedSession.totalChunks = chunks.length;
progressBar.setTotal(chunks.length);
const tasks = chunks.map((chunk, index) => async () => {
if (index < projectCache.unfinishedSession.currentChunk) {
return null; // Skip already processed chunks
}
const result = await extractInfo(accountData, chunk, index, archivePath, projectCache.qualityLevel, projectCache.model);
projectCache.unfinishedSession.currentChunk = index + 1;
progressBar.update(projectCache.unfinishedSession.currentChunk);
saveProjectCache(archivePath, projectCache);
return result;
});
const results = await limitConcurrency(tasks, 3); // Process 3 chunks concurrently
const combined = combineAndDeduplicate(results.filter(result => result !== null));
console.log('Writing full.character.json...');
fs.writeFileSync('full.character.json', JSON.stringify(combined, null, 2));
console.log('full.character.json generated successfully');
const character = {
name: accountData[0].account.accountDisplayName,
...combined,
};
console.log('Consolidating character information...');
const fullCharacter = await consolidateCharacter(character, character.name, projectCache.model);
console.log('Consolidated full character information:', fullCharacter);
console.log('Writing brief.character.json...');
fs.writeFileSync('brief.character.json', JSON.stringify(fullCharacter, null, 2));
console.log('brief.character.json generated successfully');
return fullCharacter;
} finally {
await zip.close();
}
}, 'Error generating character JSON');
progressBar.stop();
projectCache.unfinishedSession.completed = true;
saveProjectCache(archivePath, projectCache);
clearGenerationCache(archivePath);
console.log('Script execution completed successfully.');
console.log('Generated character:', generatedCharacter);
} catch (error) {
console.error('Error during script execution:', error);
process.exit(1);
}
};
main();