-
Notifications
You must be signed in to change notification settings - Fork 0
/
ProofOfConcept.html
1943 lines (1656 loc) · 67.2 KB
/
ProofOfConcept.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!-- Add this before the script tags -->
<div id="chatMessages"></div>
<div id="loading" style="display: none;">Loading...</div>
<div class="input-container">
<input type="text" id="userInput" value="Create a sentence where all words are not in the Bible" placeholder="Type your message" readonly>
<button onclick="handleUserInput()">Submit</button>
<!-- Synthesize button removed -->
<button onclick="getFinalAnswer()" id="finalAnswerButton" disabled>Get Final Answer</button>
</div>
<script src="https://cdn.jsdelivr.net/npm/axios/dist/axios.min.js"></script>
<script>
class SuperLLM {
constructor() {
this.apiKey = ''; // Replace with your actual API key
this.baseURL = 'https://api.openai.com/v1/chat/completions';
this.evaluatorSensitivity = 'medium'; // 'low', 'medium', 'high'
this.maxIterations = 3; // Prevent infinite loops
this.baseQuery = ''; // Add this to store original query
this.currentConsideration = ''; // Add this to store current consideration
// Updated personas with response_grader added
this.personas = [
{
role: "prompt_understander",
systemPrompt: `You are a precise grading criteria generator. Create detailed, unambiguous criteria.
Format your response EXACTLY like this, using standard ASCII characters:
GRADING_CRITERIA:
Criterion: [Name]
• Full Definition:
- Exact meaning: [precise explanation]
- Scope: [what's included/excluded]
- Requirements: [specific conditions]
• Measurement Method:
1. [First step]
2. [Second step]
3. [Additional steps...]
• Edge Cases:
- Case 1: [how to handle]
- Case 2: [how to handle]
- Case 3: [how to handle]
• Examples:
PASS: [example that meets all criteria]
FAIL: [example that doesn't meet criteria]
CHECKER_EQUATIONS:
• Primary Check: [exact formula]
• Edge Check: [exact formula]`,
model: "gpt-4-turbo-preview"
},
{
role: "strategic_planner",
systemPrompt: `You are an expert strategic planner who creates optimal solution approaches within LLM constraints. Given a structured analysis of a problem, create a strategy that works within these strict limitations:
Available Capabilities:
- Natural language processing and generation
- Logical reasoning and analysis
- Mathematical calculations
- Code suggestion and review
- Step-by-step instruction creation
- Pattern recognition and application
Strict Limitations:
- NO internet access or external data retrieval
- NO file system access or persistence
- NO code execution or compilation
- NO real-time data or API calls
- NO memory between conversations
- NO user interaction beyond the current exchange
Solution Requirements:
1. Solution Architecture:
- Must work entirely within single conversation
- Use only information provided in prompt
- Break complex tasks into LLM-feasible steps
2. Resource Management:
- Optimize for token usage
- Plan for context window limitations
- Structure output for clarity
3. Validation Strategy:
- Include self-verification steps
- Plan for error detection
- Add consistency checks
4. Implementation Approach:
- Focus on what LLM can directly provide
- Include clear handoff points for human action
- Note where external tools would be needed
Output Format:
---
STRATEGY_OVERVIEW:
[Brief summary of LLM-feasible approach]
EXECUTION_PHASES:
1. [Phase name]
• LLM Actions: [What this LLM can directly do]
• Human Actions: [What the user needs to do]
Validation: [How to verify success]
IMPLEMENTATION_PLAN:
function solveProblem() {
// Only include steps executable in this conversation
}
LIMITATIONS_AND_HANDOFFS:
• [Limitation 1] -> [Workaround/Human Action]
• [Limitation 2] -> [Workaround/Human Action]
SUCCESS_CRITERIA:
• [Verifiable outcome 1]
• [Verifiable outcome 2]
---
Remember: Focus only on what can be achieved within a single conversation with no external resources.`,
model: "gpt-4-turbo-preview"
},
{
role: "logic_questioner",
systemPrompt: "You are a friendly logic checker...",
model: "gpt-4-turbo-preview"
},
{
role: "first_principles",
systemPrompt: "You are focused on understanding things...",
model: "gpt-4-turbo-preview"
},
{
role: "pattern_challenger",
systemPrompt: `You are an engaging and thoughtful analyst...`,
model: "gpt-4-turbo-preview"
},
{
role: "clarity_enhancer",
systemPrompt: `You are an expert at making complex information...`,
model: "gpt-4-turbo-preview"
},
{
role: "skeptic",
systemPrompt: `You are a rigorous skeptic and critical thinker. Your role is to scrutinize answers for potential flaws, oversights, or hidden assumptions. Focus on:
1. Logical Flaws:
- Identify circular reasoning
- Point out false equivalencies
- Highlight correlation/causation errors
2. Hidden Assumptions:
- Expose unstated premises
- Question implicit biases
- Challenge conventional wisdom
3. Edge Cases:
- Find scenarios where the solution fails
- Identify boundary conditions
- Point out exceptional circumstances
4. Implementation Risks:
- Highlight practical challenges
- Identify potential failure points
- Note resource constraints
Format your responses as:
"SKEPTIC'S CONCERN: [Brief description of the issue]
REASONING: [Short explanation]
IMPACT: [Why this matters]"
Be constructive but uncompromising in your analysis. Focus on substantive issues, not minor nitpicks.`,
model: "gpt-4-turbo-preview"
},
{
role: "logic_validator",
systemPrompt: `You are a precise logic validator that evaluates responses and learns from failures. Your role is to systematically verify each component and synthesize learnings from any failures.
Validation Approach:
1. Token-Level Analysis:
- Examine each word/token individually
- Verify against specific criteria
- Flag any non-compliant elements
2. Logical Structure:
- Validate syntax completeness
- Check logical flow
- Verify proper nesting/hierarchy
3. Constraint Compliance:
- Check each element against rules
- Identify rule violations
- Track constraint satisfaction
4. Learning Collection:
- Document each failure pattern
- Identify root causes
- Track frequency of issue types
- Note unexpected edge cases
Output Format:
---
VALIDATION_SUMMARY:
[Brief overview of validation results]
DETAILED_ANALYSIS:
[Input segment]: {
"tokens": [
{
"token": "[word/symbol]",
"compliant": true/false,
"rule_checked": "[applicable rule]",
"issue": "[if non-compliant]",
"pattern": "[failure pattern if any]"
}
],
"structure_valid": true/false,
"constraint_violations": [
{
"violation": "[details]",
"pattern": "[failure pattern]",
"frequency": "[count]"
}
]
}
COMPLIANCE_SCORE:
• Total Tokens: [count]
• Compliant: [count]
• Non-Compliant: [count]
• Compliance Rate: [percentage]
FAILURE_PATTERNS:
• Pattern 1: {
"description": "[pattern description]",
"frequency": [count],
"examples": ["[examples]"],
"root_cause": "[analysis]"
}
• Pattern 2: { ... }
LEARNING_SYNTHESIS:
1. Primary Issues:
• [Most common failure patterns]
• [Root causes]
• [Impact analysis]
2. Edge Cases:
• [Unexpected failures]
• [Corner cases]
• [Boundary conditions]
3. Improvement Suggestions:
• [Specific refinements]
• [Rule clarifications]
• [Additional constraints needed]
VALIDATION_RESULT:
✓ PASS | ✗ FAIL
[List of failed criteria if any]
PROMPT_REFINEMENT:
[Suggested prompt modifications based on learnings]
---
`,
model: "gpt-4-turbo-preview"
},
{
role: "response_grader",
systemPrompt: `You are an extremely thorough and skeptical response grader. Assume the criteria might contain hidden "gotcha" tests. Your task is to evaluate the AI assistant's output with extreme rigor.
**Core Responsibilities:**
1. Break down EVERY word/component for individual testing
2. Assume there may be hidden requirements or trick conditions
3. Look for edge cases and potential loopholes
4. Consider multiple interpretations of each criterion
**Evaluation Process:**
1. Component-Level Analysis:
• Break response into atomic units (words, phrases, structures)
• Test each unit individually
• Document ALL checks performed
2. Hidden Requirement Detection:
• Look for implied requirements
• Consider common "trick test" patterns
• Question seemingly obvious criteria
3. Edge Case Testing:
• Test boundary conditions
• Consider multiple interpretations
• Look for loopholes
**Output Format:**
DETAILED_COMPONENT_ANALYSIS:
[For each word/component]:
• Component: [item]
• Direct Tests: [list of checks performed]
• Edge Cases Considered: [list]
• Potential Issues: [any concerns]
• Status: PASS/FAIL
HIDDEN_REQUIREMENT_CHECKS:
• Implied Rules Tested: [list]
• Trick Conditions Checked: [list]
• Assumption Validation: [list]
EDGE_CASE_ANALYSIS:
• Boundary Conditions: [list]
• Interpretation Variants: [list]
• Loophole Search: [list]
OVERALL_GRADING:
• Component-Level Results: [summary]
• Hidden Requirement Results: [summary]
• Edge Case Results: [summary]
• Final Status: PASS/FAIL
• Confidence Level: [HIGH/MEDIUM/LOW]
POTENTIAL_GOTCHAS:
• [List any suspicious patterns or potential hidden tests]
Remember: Assume the criteria might be trying to trick you. Be paranoid and thorough.`,
model: "gpt-4-turbo-preview"
}
];
// Add new properties for learning history
this.learningHistory = [];
this.promptHistory = [];
this.failurePatterns = new Map(); // Track frequency of failure patterns
// Add grading criteria
this.gradingCriteria = `
GRADING_CRITERIA:
Criterion: Accuracy
• Full Definition:
- Exact meaning: The response must correctly address the user's query without any factual errors.
- Scope: Includes all relevant aspects of the user's question.
- Requirements: Must be factually correct and relevant.
• Measurement Method:
1. Identify key components of the user's query.
2. Verify each component for factual accuracy.
3. Ensure all components are addressed comprehensively.
• Edge Cases:
- Case 1: Ambiguous queries should seek clarification.
- Case 2: Conflicting information sources.
- Case 3: Extremely broad or narrow queries.
• Examples:
PASS: "To fix the error in your code, you need to define the gradingCriteria in the constructor as follows..."
FAIL: "You should always initialize your variables."
CHECKER_EQUATIONS:
• Primary Check: SUM(correct_components) / SUM(total_components) >= 0.9
• Edge Check: Presence of at least one valid example PASS and no FAIL examples.`;
// Add new tracking properties
this.failureHistory = new Map(); // Track word failures
this.successHistory = new Map(); // Track successful words
this.patternHistory = new Map(); // Track recurring patterns
this.iterationHistory = []; // Track full iteration details
}
async makeOpenAIRequest(messages, model = "gpt-4-turbo-preview") {
try {
console.log(`Making request with model: ${model}`);
const response = await axios.post(this.baseURL, {
model: model,
messages: messages,
temperature: 0.7,
max_tokens: 800
}, {
headers: {
'Authorization': `Bearer ${this.apiKey}`,
'Content-Type': 'application/json'
}
});
const content = response.data.choices[0].message.content;
console.log(`Received response: ${content}`);
return content;
} catch (error) {
console.error('OpenAI API Error:', error.response?.data || error.message);
throw new Error(`Failed to get response from OpenAI: ${error.message}`);
}
}
async getInitialResponse(userQuery) {
try {
const response = await this.makeOpenAIRequest([
{
role: "system",
content: "You are a helpful AI assistant."
},
{
role: "user",
content: userQuery
}
]);
return response;
} catch (error) {
console.error('Error getting initial response:', error);
return 'Sorry, I encountered an error generating a response.';
}
}
async generateSimulatedResponses(originalQuery, initialResponse) {
// Filter out prompt_understander and response_grader from personas for subsequent passes
const personas = this.personas.filter(p => !["prompt_understander", "response_grader"].includes(p.role));
const conversations = await Promise.all(personas.map(async persona => {
let exchanges = [];
// Initial context for the main assistant
let assistantContext = [
{ role: "system", content: "You are a helpful AI assistant. Provide detailed, accurate responses that directly address the specific aspect being asked about." },
{ role: "user", content: originalQuery },
{ role: "assistant", content: initialResponse }
];
// First question from persona (GPT-3.5)
const firstPersonaResponse = await this.makeOpenAIRequest([
{ role: "system", content: persona.systemPrompt },
{ role: "user", content: `Here's a conversation to respond to:\nUser: ${originalQuery}\nAssistant: ${initialResponse}` }
], persona.model);
exchanges.push({ type: 'persona', content: firstPersonaResponse });
// Add persona's question to assistant's context and get response
assistantContext.push({ role: "user", content: firstPersonaResponse });
const aiResponse = await this.makeOpenAIRequest(assistantContext, "gpt-3.5-turbo");
exchanges.push({ type: 'ai', content: aiResponse });
assistantContext.push({ role: "assistant", content: aiResponse });
// Second question from persona (GPT-3.5)
const secondPersonaResponse = await this.makeOpenAIRequest([
{ role: "system", content: persona.systemPrompt },
{ role: "user", content: `Here's the conversation so far:\nUser: ${originalQuery}\nAssistant: ${initialResponse}\nYou: ${firstPersonaResponse}\nAssistant: ${aiResponse}\n\nContinue the conversation maintaining your role.` }
], persona.model);
exchanges.push({ type: 'persona', content: secondPersonaResponse });
// Add second question to assistant's context and get response
assistantContext.push({ role: "user", content: secondPersonaResponse });
const secondAiResponse = await this.makeOpenAIRequest(assistantContext, "gpt-3.5-turbo");
exchanges.push({ type: 'ai', content: secondAiResponse });
return {
role: persona.role,
exchanges: exchanges
};
}));
return conversations;
}
/**
* Orchestrates the entire process with conditional looping based on evaluation.
* @param {string} originalQuery - The user's original query.
* @param {number} iteration - Current iteration count.
* @returns {string} - The synthesized response.
*/
async processQuery(originalQuery, iteration = 1) {
try {
console.log(`Processing query. Iteration: ${iteration}`);
// For first iteration, get initial response
let currentResponse;
if (iteration === 1) {
this.baseQuery = originalQuery;
currentResponse = await this.getInitialResponse(originalQuery);
if (!currentResponse) throw new Error("Failed to get initial response");
addMessage(currentResponse, 'ai');
} else {
currentResponse = originalQuery; // Use the refined attempt as current response
}
// Check response against criteria
const checkResult = await this.checkResponseAgainstCriteria(
currentResponse,
this.gradingCriteria,
iteration
);
if (!checkResult) {
throw new Error("Failed to check response against criteria");
}
// If succeeded, return the successful response
if (checkResult.succeeded) {
addMessage("✅ Success! All criteria met.", 'system');
addMessage("🎯 Final successful response:", 'system');
addMessage(currentResponse, 'ai');
// Return the successful response
return currentResponse;
}
// If failed, generate refined prompt
const refinedPrompt = await this.makeOpenAIRequest([
{
role: "system",
content: `You are helping refine a prompt based on test failures.
Your goal is to generate a sentence where no words appear in the Bible.
Review the failures and create specific guidance to avoid these issues.`
},
{
role: "user",
content: `ORIGINAL TASK: ${this.baseQuery}
LATEST TEST RESULTS:
${checkResult.synthesis}
Create a new prompt that:
1. Specifically addresses the words that failed
2. Provides clear guidance on what types of words to use instead
3. Gives concrete examples of successful approaches
Respond with a clear, actionable prompt.`
}
], "gpt-4-turbo-preview");
if (!refinedPrompt) {
throw new Error("Failed to generate refined prompt");
}
addMessage("Refining prompt based on test results...", 'system');
addMessage(refinedPrompt, 'system');
// Generate new attempt using the refined prompt
const newAttempt = await this.makeOpenAIRequest([
{
role: "system",
content: refinedPrompt
},
{
role: "user",
content: "Generate one sentence that meets all requirements above."
}
], "gpt-4-turbo-preview");
if (!newAttempt) {
throw new Error("Failed to generate new attempt");
}
addMessage("New attempt:", 'system');
addMessage(newAttempt, 'ai');
// Continue with next iteration
return await this.processQuery(newAttempt, iteration + 1);
} catch (error) {
console.error('Error in processQuery:', error);
addMessage(`Error: ${error.message}`, 'system');
return null;
}
}
async checkResponseAgainstCriteria(response, criteria, iteration = 1) {
try {
const actualResponse = response.split(':').slice(-1)[0].trim();
const words = actualResponse.replace(/[.,!?""]/g, '').split(/\s+/).filter(word => word.length > 0);
// Make each test clearer with a pass/fail
const testResults = await Promise.all(words.map(async word => {
const testPrompt = `
For word: "${word}"
Quick analysis: Is this word found in any version of the Bible?
Format:
ANALYSIS: [1-2 sentence reasoning]
RESULT: YES/NO
TEST: ${word} is not in Bible = PASS/FAIL`;
const result = await this.makeOpenAIRequest([
{
role: "system",
content: "You are a text analyzer. Provide brief reasoning and clear PASS/FAIL status."
},
{
role: "user",
content: testPrompt
}
], "gpt-4-turbo-preview");
addTestResult(word, result);
return { word, result };
}));
// Simple synthesis that just looks for any fails
const synthesis = await this.makeOpenAIRequest([
{
role: "system",
content: `You are reviewing test results. Count passes and fails only.`
},
{
role: "user",
content: `Review these test results and provide a simple count:
${JSON.stringify(testResults, null, 2)}
Format:
SYNTHESIS:
• Total Tests: [number]
• Passes: [number]
• Fails: [number]
• Failed Words: [list only words that failed]
OUTCOME: [if any fails exist = CONTINUE, if all pass = COMPLETE]`
}
], "gpt-4-turbo-preview");
const succeeded = synthesis.includes('OUTCOME: COMPLETE');
return {
testResults,
synthesis,
succeeded,
iteration
};
} catch (error) {
console.error('Error in checkResponseAgainstCriteria:', error);
return null;
}
}
// Helper method to get learning prompt
getLearningPrompt(iteration) {
return `You are a fact-based learning analyzer. Your role is to document ONLY verified outcomes from previous attempts, with no speculation.
Previous Learning History:
${this.formatLearningHistory()}
Format your response EXACTLY like this:
LEARNING_ANALYSIS:
ITERATION: ${iteration}
VERIFIED_SUCCESSES:
• Word: [successful word]
Context: [exact context where it worked]
Verification: [how it was verified]
VERIFIED_FAILURES:
• Word: [failed word]
Context: [exact context of failure]
Verification: [how failure was confirmed]
STATISTICAL_SUMMARY:
• Total Words Tested: [number]
• Success Rate: [percentage]
• Most Common Failure Type: [type with count]
FACTUAL_PATTERNS:
• [observed pattern] occurred [X] times
• [another pattern] occurred [Y] times
Remember:
- Include ONLY verified outcomes
- NO suggestions or hypotheticals
- NO speculation about why something might work
- ONLY report patterns with 2+ occurrences
- Include exact counts and percentages`;
}
// Add new helper method to extract refined prompt
async extractRefinedPrompt(synthesis, iteration = 1) {
const learningPrompt = `You are a conservative prompt refiner focused only on verified patterns. Your role is to use ONLY proven successful patterns to guide the next attempt.
CURRENT_ITERATION: ${iteration}
SUCCESS_PATTERNS:
${Array.from(this.successHistory.entries())
.filter(([_, count]) => count >= 2)
.map(([word, count]) => `• "${word}" succeeded ${count} times`)
.join('\n')}
VERIFIED_FAILURES:
${Array.from(this.failureHistory.entries())
.map(([word, count]) => `• "${word}" failed ${count} times`)
.join('\n')}
STATISTICAL_EVIDENCE:
• Total Attempts: ${iteration}
• Success Rate: ${this.calculateSuccessRate()}%
• Most Common Failure: ${this.getMostCommonFailure()}
Your task:
1. Use ONLY patterns that have succeeded multiple times
2. DO NOT suggest experimental or untested approaches
3. DO NOT speculate about new strategies
4. If no proven patterns exist, state "Insufficient data for pattern-based guidance"
Format your response as:
REFINED_PROMPT:
[Base prompt using only proven successful patterns]
VERIFICATION_CRITERIA:
• [specific, measurable criterion based on past successes]
• [another specific criterion]`;
try {
const refinedPrompt = await this.makeOpenAIRequest([
{
role: "system",
content: learningPrompt
},
{
role: "user",
content: `Based on the above data, provide a refined prompt using ONLY verified successful patterns.`
}
], "gpt-4-turbo-preview");
return refinedPrompt;
} catch (error) {
console.error('Prompt Refinement Error:', error);
return 'Error during prompt refinement.';
}
}
// Helper methods to support the refined approach
calculateSuccessRate() {
const successes = Array.from(this.successHistory.values()).reduce((a, b) => a + b, 0);
const failures = Array.from(this.failureHistory.values()).reduce((a, b) => a + b, 0);
const total = successes + failures;
return total ? Math.round((successes / total) * 100) : 0;
}
getMostCommonFailure() {
const failures = Array.from(this.failureHistory.entries());
if (!failures.length) return "None recorded";
failures.sort((a, b) => b[1] - a[1]);
return `"${failures[0][0]}" (${failures[0][1]} times)`;
}
// Add new helper method to format history summary
formatHistorySummary() {
return this.iterationHistory.map(entry => `
Iteration ${entry.iteration}:
• Failed Words: ${entry.failures.join(', ')}
• Active Patterns: ${entry.patterns.map(([pattern, count]) =>
`${pattern} (${count}x)`).join(', ')}
`).join('\n');
}
// Add helper method to format pattern analysis
formatPatternAnalysis() {
const recurring = Array.from(this.patternHistory.entries())
.filter(([_, count]) => count > 1)
.sort((a, b) => b[1] - a[1]);
const successful = Array.from(this.successHistory.entries())
.map(([word, count]) => `${word} (${count}x)`);
return `
Recurring Issues:
${recurring.map(([pattern, count]) => `• ${pattern}: ${count}x`).join('\n')}
Successful Approaches:
${successful.length ? successful.join(', ') : 'None recorded yet'}`;
}
// Helper method to format learning history for prompts
formatLearningHistory() {
if (this.learningHistory.length === 0) return "No previous attempts.";
return this.learningHistory.map((entry, index) => `
Attempt ${index + 1}:
${entry.learning}
Effectiveness: ${entry.effectiveness}
---`).join('\n');
}
// Method to update learning history
updateLearningHistory(learning, iteration) {
// Extract failure patterns
const patterns = learning.match(/FAILURE_PATTERNS:([\s\S]*?)(?=\n\nROOT_CAUSES:|$)/)?.[1] || '';
patterns.split('•').forEach(pattern => {
if (pattern.trim()) {
const count = this.failurePatterns.get(pattern.trim()) || 0;
this.failurePatterns.set(pattern.trim(), count + 1);
}
});
// Calculate effectiveness based on pattern recurrence
const effectiveness = this.calculateEffectiveness(patterns, iteration);
this.learningHistory.push({
iteration,
learning,
patterns: patterns.split('•').filter(p => p.trim()),
effectiveness,
timestamp: new Date()
});
}
// Method to calculate effectiveness of previous attempts
calculateEffectiveness(currentPatterns, iteration) {
if (iteration === 1) return "Baseline";
const previousPatterns = this.learningHistory[iteration - 2].patterns;
const recurringPatterns = previousPatterns.filter(p =>
currentPatterns.includes(p)
).length;
if (recurringPatterns === 0) return "High";
if (recurringPatterns < previousPatterns.length / 2) return "Medium";
return "Low";
}
// Add new method to clean prompts
cleanPrompt(prompt) {
// Remove common pleasantries and meta-commentary
const cleanedPrompt = prompt
.replace(/^(hi|hello|greetings|sure|okay|alright|here's|let me|i will|i'll|i can).*?\n/gi, '')
.replace(/^(based on|according to|considering|taking into account).*?\n/gi, '')
.replace(/\n(thanks|thank you|hope this helps|let me know).*$/gi, '')
.trim();
return cleanedPrompt;
}
async synthesizeLearning(criteria, checkResult) {
const learningPrompt = `You are a context synthesizer. Your task is to analyze test results and create clear, actionable guidance for the next attempt.
Format your response EXACTLY like this:
REFINED_CONTEXT:
GOAL: [One clear sentence stating the objective]
SPECIFICATIONS:
• [Key requirements from original criteria]
• [Additional specifications from original criteria]
LEARNED_CONSTRAINTS:
• AVOID:
- [Specific pattern that failed]
- [Another pattern that failed]
- [Additional patterns to avoid]
• PREFER:
- [Pattern or approach that worked better]
- [Another successful pattern]
- [Additional recommended approaches]
EXAMPLES:
✗ FAILED: "[concrete example from test results]"
WHY: [Clear explanation of why this failed]
✓ WORKED: "[concrete example or hypothetical based on learnings]"
WHY: [Clear explanation of why this works]
EXECUTION_GUIDANCE:
1. [Specific, actionable step based on learnings]
2. [Another concrete step or technique to use]
3. [Final guidance point with clear direction]
Remember: Each section must be filled with specific, concrete information from the test results.`;
try {
const learning = await this.makeOpenAIRequest([
{
role: "system",
content: learningPrompt
},
{
role: "user",
content: `ORIGINAL CRITERIA:
${criteria}
TEST RESULTS:
${checkResult}
Based on these test results:
1. Extract specific patterns that failed
2. Identify any approaches that showed promise
3. Create concrete, actionable guidance for the next attempt
4. Include real examples from the test results
5. Provide clear, step-by-step execution guidance
Synthesize this into a refined context that will guide the next attempt.`
}
], "gpt-3.5-turbo-0125");
// Verify that all sections are present and filled
const requiredSections = ['GOAL:', 'SPECIFICATIONS:', 'LEARNED_CONSTRAINTS:', 'EXAMPLES:', 'EXECUTION_GUIDANCE:'];
const missingOrEmpty = requiredSections.filter(section =>
!learning.includes(section) ||
learning.split(section)[1].trim().length < 10
);
if (missingOrEmpty.length > 0) {
console.warn('Missing or empty sections:', missingOrEmpty);
// Retry with more explicit instructions for missing sections
return this.makeOpenAIRequest([
{
role: "system",
content: learningPrompt
},
{
role: "user",
content: `${learning}\n\nThe above response is missing or has empty sections: ${missingOrEmpty.join(', ')}. Please provide a complete response with all sections filled in with specific, concrete information.`
}
], "gpt-3.5-turbo-0125");
}
return learning;
} catch (error) {
console.error('Learning Synthesis Error:', error);
return 'Error during learning synthesis.';
}
}
/**
* Grades the assistant's output against the grading criteria.
* @param {string} conversationSummary
* @param {string} assistantOutput
* @returns {string} - The grading feedback.
*/
async gradeResponse(conversationSummary, assistantOutput) {
const graderPersona = this.personas.find(p => p.role === "response_grader");
if (!graderPersona) {
console.error('Response Grader persona not found.');
return 'Grading functionality is not available.';
}
try {
const gradingResponse = await this.makeOpenAIRequest([
{ role: "system", content: graderPersona.systemPrompt },
{ role: "user", content: `GRADING_CRITERIA and CHECKER_EQUATIONS:\n${this.extractGradingCriteria(conversationSummary)}\n\nAI Assistant's Output:\n${assistantOutput}` }
], graderPersona.model);
return gradingResponse;
} catch (error) {
console.error('Grading Error:', error);
return 'An error occurred while grading the response.';
}
}
/**
* Extracts the grading criteria from the conversation summary.
* @param {string} conversationSummary
* @returns {string} - The grading criteria and checker equations.
*/
extractGradingCriteria(conversationSummary) {
// Use regex to extract GRADING_CRITERIA and CHECKER_EQUATIONS from the summary
const gradingMatch = conversationSummary.match(/GRADING_CRITERIA:\n([\s\S]+?)\n\nCHECKER_EQUATIONS:/);
const checkerMatch = conversationSummary.match(/CHECKER_EQUATIONS:\n([\s\S]+?)\n\n/);
let gradingCriteria = '';
if (gradingMatch && gradingMatch[1]) {
gradingCriteria = gradingMatch[1].trim();
}
let checkerEquations = '';
if (checkerMatch && checkerMatch[1]) {
checkerEquations = checkerMatch[1].trim();
}
return `GRADING_CRITERIA:\n${gradingCriteria}\n\nCHECKER_EQUATIONS:\n${checkerEquations}`;
}
/**
* Formats the conversation summary for evaluation.
* @param {string} originalQuery
* @param {Array} conversations
* @returns {string}
*/
formatConversationSummary(originalQuery, conversations) {
let summary = `Original Question: ${originalQuery}\n\n`;
conversations.forEach(conv => {
summary += `=== ${conv.role.toUpperCase()} PERSPECTIVE ===\n`;
conv.exchanges.forEach(exchange => {
summary += `${exchange.type === 'persona' ? 'Question' : 'Response'}: ${exchange.content}\n`;
});
summary += '\n';
});
return summary;
}
/**
* Updates the original query with the provided consideration.
* @param {string} baseQuery
* @param {string} consideration
* @returns {string}
*/
updateQueryWithConsideration(baseQuery, consideration) {
// Extract key requirements from the consideration
const requiresLiteralInterpretation = consideration.includes('LITERAL_CONSTRAINT');
if (requiresLiteralInterpretation) {
// Rephrase the query to be more explicit about literal requirements
return this.rephraseWithLiteralRequirements(baseQuery);
}
// Add other cases as needed
return baseQuery;
}
rephraseWithLiteralRequirements(query) {
// Add logic to rephrase different types of queries
// This is just an example for the Bible words case