From 9ae51b022e4d03f8641af94f2a80145ca425a045 Mon Sep 17 00:00:00 2001
From: WalkThroughTheDoorAndDoTheDinosaur <contact@rmusser.net>
Date: Tue, 30 Apr 2024 21:08:56 -0700
Subject: [PATCH] cleanup main folder

---
 tldw-scripts/chunker.py           |  61 +++++++++++++
 tldw-scripts/compare-app.py       |  57 ++++++++++++
 tldw-scripts/compare.py           |  69 ++++++++++++++
 tldw-scripts/merger.py            |  29 ++++++
 tldw-scripts/pyannote.py          |  10 +++
 tldw-scripts/roller-chatgpt-v2.py |  86 ++++++++++++++++++
 tldw-scripts/roller-chatgpt.py    | 114 +++++++++++++++++++++++
 tldw-scripts/roller-exllama.py    | 127 ++++++++++++++++++++++++++
 tldw-scripts/roller-vllm.py       | 145 ++++++++++++++++++++++++++++++
 9 files changed, 698 insertions(+)
 create mode 100644 tldw-scripts/chunker.py
 create mode 100644 tldw-scripts/compare-app.py
 create mode 100644 tldw-scripts/compare.py
 create mode 100644 tldw-scripts/merger.py
 create mode 100644 tldw-scripts/pyannote.py
 create mode 100644 tldw-scripts/roller-chatgpt-v2.py
 create mode 100644 tldw-scripts/roller-chatgpt.py
 create mode 100644 tldw-scripts/roller-exllama.py
 create mode 100644 tldw-scripts/roller-vllm.py

diff --git a/tldw-scripts/chunker.py b/tldw-scripts/chunker.py
new file mode 100644
index 000000000..9980c3e0e
--- /dev/null
+++ b/tldw-scripts/chunker.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python3
+import string
+import json
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained('hf-internal-testing/llama-tokenizer', use_fast = True)
+
+def segment_merger(filename, max_text_len = 1000):
+    segments = json.load(open(filename))
+
+    text = ''
+    last_segment = { 'speaker': None }
+    start_time = None
+    stop_chars = string.punctuation.replace(',','')
+
+    for segment in segments:
+        early_break = (max_text_len > 0) and (len(text) > max_text_len) and (text[-1] in stop_chars)
+        if last_segment['speaker'] != segment['speaker'] or early_break:
+            if text != '':
+                yield { 'speaker': last_segment['speaker'], 'text': text, 'start': start_time, 'end': last_segment['end'] }
+            text = segment['text'].lstrip()
+            start_time = segment['start']
+        else:
+            text += segment['text']
+        last_segment = segment
+
+    if text != '':
+        yield { 'speaker': last_segment['speaker'], 'text': text, 'start': start_time, 'end': last_segment['end'] }
+
+def time_splitter(merged_segments, chunk_size = 300):
+    start_time = None
+    text = ''
+    speakers = []
+
+    for segment in merged_segments:
+        if start_time is None:
+            start_time = segment['start']
+        if not segment['speaker'] in speakers: speakers.append(segment['speaker'])
+        text += f"{segment['speaker']}: {segment['text']}\n"
+        if segment['end'] - start_time >= chunk_size:
+            yield { 'text': text, 'start': start_time, 'end': segment['end'], 'speakers': speakers }
+            start_time = None
+            text = ''
+            speakers = []
+
+def main(prefix: str, chunk_size: int = 300, max_text_len: int = 800):
+    merged_segments = list(segment_merger(prefix+'.diarize.json', max_text_len))
+    split_segments = list(time_splitter(merged_segments, chunk_size))
+    max_tokens = 0
+    with open(prefix+'.chunk.json', 'w') as f:
+        json.dump(split_segments, f)
+    for idx, segment in enumerate(split_segments):
+        logits = tokenizer.encode(segment['text'])
+        if len(logits) > max_tokens: max_tokens = len(logits)
+        print(f"Segment {idx}: {len(logits)} tokens, {len(segment['text'])} characters, {int(segment['end']-segment['start'])} seconds")
+
+    print(f"Largest chunk was {max_tokens} tokens")
+    print(f"Wrote {len(split_segments)} chunks to {prefix}.chunk.json")
+
+if __name__ == "__main__":
+    import fire
+    fire.Fire(main)
\ No newline at end of file
diff --git a/tldw-scripts/compare-app.py b/tldw-scripts/compare-app.py
new file mode 100644
index 000000000..3cc4f8b50
--- /dev/null
+++ b/tldw-scripts/compare-app.py
@@ -0,0 +1,57 @@
+import json
+import streamlit as st
+import glob
+
+def load_analysis_file(file_path):
+    with open(file_path, 'r') as file:
+        data = json.load(file)
+    return data
+
+def display_analysis_data(data):
+    tests = data['tests']
+    models_list = data['models']
+    models = {}
+    for idx, model_info in enumerate(models_list):
+        models[model_info['id']] = model_info
+
+    # summary table
+    summary_cols = st.columns(len(models_list))
+    for model_id, model_info in models.items():
+        with summary_cols[model_info['idx']]:
+            st.subheader(f"{model_info['short_name']}")
+
+    for test_name, test_data in tests.items():
+        st.markdown(f"#### {test_name}")
+
+        columns = st.columns(len(models))
+        if 'summary' in test_data:
+            st.markdown("**Analysis**: "+test_data['summary'])
+            
+        for model_id, model_result in test_data['results'].items():
+            model_info = models[model_id]
+
+            model_result['passing_tests'] = '\n\n'.join([f":blue[{x}]" for x in model_result['passing_tests'].split('\n') if x.strip() != ''])
+            model_result['failing_tests'] = '\n\n'.join([f":red[{x}]" for x in model_result['failing_tests'].split('\n') if x.strip() != ''])
+
+            with columns[model_info['idx']]:
+                #st.subheader(f"{model_info['short_name']}")
+                st.write(model_result['answer'])
+                    
+st.set_page_config(page_title='Analysis Explorer', layout="wide")
+st.markdown("""
+        <style>
+            .block-container {
+                    padding-top: 2rem;
+                    padding-bottom: 0rem;
+                    padding-left: 3rem;
+                    padding-right: 3.5rem;
+                }
+        </style>
+        """, unsafe_allow_html=True)
+
+files = sorted(glob.glob('compare/*.json'))
+data = [json.load(open(file,'r')) for file in files]
+titles = [x['config']['title'] for x in data]
+options = st.selectbox('Select Summary', titles)
+idx = titles.index(options)
+display_analysis_data(data[idx])
diff --git a/tldw-scripts/compare.py b/tldw-scripts/compare.py
new file mode 100644
index 000000000..1eba9bacf
--- /dev/null
+++ b/tldw-scripts/compare.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python3
+import json
+import os
+from jinja2 import Template
+import fire
+import yaml
+from copy import copy
+
+def prepare(TEST_LANGUAGE, path, files):
+    out = {}
+    models = []
+
+    for idx, info in enumerate(files):
+        file = os.path.join(path, info['eval'])
+        id = info['id']
+
+        tags = os.path.basename(file).replace('.ndjson', '').split('_')
+        prompt = tags[3]
+        params = tags[5]
+        model = tags[6]
+
+        models.append({'prompt': prompt, 'short_name': info['short_name'], 'params': params, 'model': model, 'id': id, 'idx': idx, 'passed': 0, 'total': 0})
+        results = [json.loads(line) for line in open(file)]
+    
+        for r in results:
+            if r['language'] != TEST_LANGUAGE:
+                continue
+
+            testid = r['name']+'-'+r['language']
+            if testid not in out:
+                out[testid] = { 'results': {}, 'task': '', 'language': r['language'] }
+
+            check_summary = ''
+            passing_tests = ''
+            failing_tests = ''
+
+            out[testid]['results'][id] = {
+                'check_summary': check_summary,
+                'passing_tests': passing_tests,
+                'failing_tests': failing_tests,
+                #'code': r['code'],
+                'answer': r['answer']
+            }
+
+            #models[idx]['passed'] += r['passed']
+            #models[idx]['total'] += r['total']
+
+    return { 'tests': out, 'models': models }
+
+def main(config: str, path: str = "./", analyser: str = "", language: str = "english"):
+    cfg = yaml.safe_load(open(config))
+
+    for lang in language.split(','):
+        cfg['language'] = lang
+        print('Comparing results for', lang)
+        data = prepare(cfg['language'], path, cfg['models'])
+        data['config'] = copy(cfg)
+        data['config']['title'] += f" ({lang})"
+        data['analyser'] = analyser
+
+        if analyser != "":
+            analysis(data, analyser)
+
+        outfile = config.replace('.yaml', f'-{lang}.json')
+        with open(outfile, 'w') as f:
+            json.dump(data, f, indent=4)
+
+if __name__ == "__main__":
+    fire.Fire(main)
diff --git a/tldw-scripts/merger.py b/tldw-scripts/merger.py
new file mode 100644
index 000000000..97519f265
--- /dev/null
+++ b/tldw-scripts/merger.py
@@ -0,0 +1,29 @@
+import json
+import sys
+
+in_file = sys.argv[1]
+with open(in_file) as infile:
+    chunks = [json.loads(line) for line in infile.readlines()]
+
+def part_to_time(part):
+    mins = part*5
+    oh = mins // 60
+    om = mins % 60
+    return f'{oh:02}:{om:02}'
+
+text = ''
+for idx, chunk in enumerate(chunks):
+    #text += f'\n\n[{part_to_time(idx)} - {part_to_time(idx+1)}] '
+    text += f'\nSection {idx+1}: {chunk["answer"]}\n'
+
+out_file = in_file.replace('ndjson','txt')
+with open(out_file,'w') as outfile:
+    outfile.write(text)
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained('hf-internal-testing/llama-tokenizer', use_fast = True)
+logits = tokenizer.encode(text)
+
+print('chunks:', len(chunks))
+print('summary bytes:', len(text))
+print('summary tokens:', len(logits))
\ No newline at end of file
diff --git a/tldw-scripts/pyannote.py b/tldw-scripts/pyannote.py
new file mode 100644
index 000000000..fbeba2b05
--- /dev/null
+++ b/tldw-scripts/pyannote.py
@@ -0,0 +1,10 @@
+from pyannote.audio import Pipeline
+import torch
+pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization").to(torch.device("cuda"))
+
+# 4. apply pretrained pipeline
+diarization = pipeline("lex.wav", num_speakers=2)
+
+# 5. print the result
+for turn, _, speaker in diarization.itertracks(yield_label=True):
+    print(f"start={turn.start:.1f}s stop={turn.end:.1f}s speaker_{speaker}")
diff --git a/tldw-scripts/roller-chatgpt-v2.py b/tldw-scripts/roller-chatgpt-v2.py
new file mode 100644
index 000000000..1dcde54ff
--- /dev/null
+++ b/tldw-scripts/roller-chatgpt-v2.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+from jinja2 import Template
+import json
+
+prompt_template = """
+Continue the rolling transcription summary of "{{title}}".  Consider the current context when summarizing the given transcription part.
+
+### Context: {{ context }}
+Speaker-Map: {{ speakermap }}
+
+### Transcription part {{ idx }} of {{ len }}, start time {{ start }}:
+{{ chunk }}
+
+### Instruction: Using the Context above, analyze the Trasncription and respond with a JSON object in this form:
+
+{
+    "Speaker-Map": { "SPEAKER 1": "Bob Dole", "SPEAKER 2": "Jane Doe" } // A map of speakers to their names, make sure to remember all previous speakers.
+    "Next-Context": "..." // An updated context for the next part of the transcription. Always include the speakers and the current topics of discussion.
+    "Summary": "..." // A detailed, point-by-point summary of the current transcription.
+}
+"""
+
+from openai import OpenAI
+
+client = OpenAI()
+
+def main(prefix: str, init_speakers: str = ""):
+    the_template = Template(prompt_template)
+
+    split_segments = json.load(open(prefix+'.chunk.json'))
+    info = json.load(open(prefix+'.info.json'))
+
+    context = f"""
+    Video Title: {info['title']}
+    Video Description: {info['description'][:1024]}
+    """
+    
+    speakers = "{ UNKNOWN }"
+
+    f = open(prefix+'.summary.json', 'w')
+    idx = 0
+    for chunk in split_segments:
+        dur = chunk['end'] - chunk['start']
+        print(f"{idx}: {dur}s {len(chunk)}")
+
+        prompt = the_template.render(chunk=chunk['text'], start=chunk['start'], end=chunk['end'],
+                                     idx=idx, len=len(split_segments), context=context, speakermap=speakers, title=info['title'])
+        
+        messages = [{'role': 'user', 'content': prompt }]
+        response = client.chat.completions.create(messages=messages,model='gpt-3.5-turbo-1106',temperature=0.1,max_tokens=1024, response_format={ "type": "json_object" })
+
+        answer = response.choices[0].message.content
+        
+        parsed = json.loads(answer)
+        
+        summary = parsed.get('Summary','')
+        new_speakers = parsed.get('Speaker-Map','')
+        new_context = parsed.get('Next-Context','')
+        
+        if summary == '' or new_context == '' or new_speakers == '':
+            print('extraction failed:', new_context, new_speakers, summary)
+            exit(1)
+        else:
+            section = {
+                'start': chunk['start'],
+                'end': chunk['end'],
+                'summary': summary,
+                'speakers': new_speakers,
+                'context': new_context
+            }
+            print('## ', new_speakers)
+            print('>> ', new_context)
+            print(summary)
+            print()
+            
+            f.write(json.dumps(section)+'\n')
+            f.flush()
+
+            context = new_context
+            speakers = new_speakers
+
+        idx = idx + 1
+
+if __name__ == "__main__":
+    import fire
+    fire.Fire(main)
\ No newline at end of file
diff --git a/tldw-scripts/roller-chatgpt.py b/tldw-scripts/roller-chatgpt.py
new file mode 100644
index 000000000..410b58d93
--- /dev/null
+++ b/tldw-scripts/roller-chatgpt.py
@@ -0,0 +1,114 @@
+#!/usr/bin/env python3
+from jinja2 import Template
+import json
+
+prompt_template = """
+Continue the rolling transcription summary of "{{title}}".  Consider the current context when summarizing the given transcription part.
+
+### Context: {{ context }}
+Speaker-Map: {{ speakermap }}
+
+### Transcription part {{ idx }} of {{ len }}, start time {{ start }}:
+{{ chunk }}
+
+### Instruction: Structure your reply with a two element list in the following format:
+
+- Speaker-Map: A map of speakers to their names, for example { "SPEAKER 1": "Bob Dole", "SPEAKER 2": "Jane Doe" }
+- Next-Context: An updated context for the next part of the transcription. Always include the speakers and the current topics of discussion.
+- Summary: A detailed, point-by-point summary of the current transcription.
+
+"""
+
+from langchain.chat_models import ChatOpenAI
+from langchain import LLMChain, PromptTemplate
+params = {
+    "temperature": 0.7,
+    "presence_penalty": 1.176,
+    "top_p": 0.1,
+    "max_tokens": 1024
+}
+model = ChatOpenAI(model_name='gpt-3.5-turbo', **params)
+chain = LLMChain(llm=model, prompt=PromptTemplate(template='{input}', input_variables=['input']))
+
+def main(prefix: str, init_speakers: str = ""):
+    the_template = Template(prompt_template)
+
+    split_segments = json.load(open(prefix+'.chunk.json'))
+    info = json.load(open(prefix+'.info.json'))
+
+    context = f"""
+    SPEAKER 1: Not yet known
+    SPEAKER 2: Not yet known
+    Video Title: {info['title']}
+    Video Description: {info['description'][:1024]}
+    """
+
+    speakers = "{ UNKNOWN }"
+
+    f = open(prefix+'.summary.json', 'w')
+    idx = 0
+    for chunk in split_segments:
+        dur = chunk['end'] - chunk['start']
+        print(f"{idx}: {dur}s {len(chunk)}")
+
+        prompt = the_template.render(chunk=chunk['text'], start=chunk['start'], end=chunk['end'],
+                                     idx=idx, len=len(split_segments), context=context, speakermap=speakers, title=info['title'])
+        #print(prompt)
+
+        answer = chain.run(input=prompt)
+        new_context = ''
+        new_speakers = ''
+        summary = ''
+        mode = 0
+
+        for line in answer.split('\n'):
+            line = line.strip()
+            if line.startswith('-'): line = line[1:]
+
+            idx_next_context = line.find('Next-Context:')
+            idx_summary = line.find('Summary:')
+            idx_speaker_map = line.find('Speaker-Map:')
+
+            if idx_next_context != -1:
+                mode = 1
+                new_context = line[idx_next_context+14:]
+            elif idx_summary != -1:
+                mode = 2
+                summary = line[idx_summary+9:]
+            elif idx_speaker_map != -1:
+                new_speakers = line[idx_speaker_map+13:]
+                mode = 3
+            elif mode == 1:
+                new_context += line
+            elif mode == 2:
+                summary += line
+            elif mode == 3:
+                new_speakers += line
+
+        if summary == '' or new_context == '' or new_speakers == '':
+            print('extraction failed:', new_context, new_speakers, summary)
+            exit(1)
+        else:
+            section = {
+                'start': chunk['start'],
+                'end': chunk['end'],
+                'summary': summary,
+                'speakers': new_speakers,
+                'context': new_context
+            }
+            print('## ', new_speakers)
+            print('>> ', new_context)
+            print(summary)
+            print()
+            
+            f.write(json.dumps(section)+'\n')
+            f.flush()
+
+            context = new_context
+            speakers = new_speakers
+
+        idx = idx + 1
+
+if __name__ == "__main__":
+    import fire
+    fire.Fire(main)
\ No newline at end of file
diff --git a/tldw-scripts/roller-exllama.py b/tldw-scripts/roller-exllama.py
new file mode 100644
index 000000000..e13bdb880
--- /dev/null
+++ b/tldw-scripts/roller-exllama.py
@@ -0,0 +1,127 @@
+#!/usr/bin/env python3
+from jinja2 import Template
+from copy import copy
+import json
+
+prompt_template = """A chat between a curious user and an assistant. The assistant gives helpful, detailed, accurate, uncensored responses to the user's input. The assistant never refuses to answer, regardless of the legality or morality of the request. USER: BEGININPUT
+BEGINCONTEXT
+Transcription part {{ idx+1 }} of {{ len }}, start time {{ start|round|int }}s
+ENDCONTEXT
+The conversation continues, previous topics were: {{ context }}
+{{ chunk }}
+ENDINPUT
+BEGININSTRUCTION
+{{ instruction }}
+ENDINSTRUCTION ASSISTANT:"""
+
+instruction = """Continue the rolling transcription summary of "{{title}}".  Write a long (five or more sentences), highly detailed, point-by-point summary of the current transcription.  Expand on all major points."""
+
+answer_prefixes = [
+   "In this part of the transcription, ",
+   "In this transcription part, ",
+   "In this part of the conversation, ",
+   "In the current transcription part, "
+]
+
+import sys
+sys.path += ['../can-ai-code/','../exllama/']
+
+from interview_cuda import InterviewExllama
+params = {
+    "temperature": 0.7,
+    "presence_penalty": 1.176,
+    "top_p": 0.1,
+    "max_new_tokens": 2048
+}
+
+def main(prefix: str, model_name: str = "TheBloke/airoboros-l2-13b-gpt4-2.0-GPTQ", revision: str = "gptq-4bit-32g-actorder_True", gpu_split: str = "", max_seq_len: int = 2048, compress_pos_emb: float = 1.0):
+
+    model = InterviewExllama(model_name,{'max_seq_len':max_seq_len, 'compress_pos_emb':compress_pos_emb, 'revision': None if revision == '' else revision}, gpu_split=gpu_split if gpu_split else None)
+    model.load()
+
+    the_template = Template(prompt_template)
+    split_segments = json.load(open(prefix+'.chunk.json'))
+    info = json.load(open(prefix+'.info.json'))
+
+    speaker_map = {}
+    for chunk in split_segments:
+        do_find_speakers = False
+
+        for speaker in chunk['speakers']:
+            if speaker_map.get(speaker, None) is None:
+                speaker_map[speaker] = '??'
+                do_find_speakers = True
+
+        if do_find_speakers:
+            desc = info['description']
+            if len(desc) > 500: desc = desc[0:500]
+            speaker_prompts = f"Title: {info['title']}\nDescription: {desc}\nTranscript:\n---\n{chunk['text']}\n---\n"
+            speaker_prompts += f"Identify the names of each SPEAKER from the {info['title']} transcript above\n"
+
+            answer, model_info = model.generate(speaker_prompts, params)
+            print(answer)
+
+            for line in answer.strip().split('\n'):
+                for speaker, name in speaker_map.items():
+                    if name == '??' and (speaker in line):
+                        found_name = line.split(speaker)[1]
+                        if found_name[0] == ':': found_name = found_name[1:]
+                        speaker_map[speaker] = found_name.strip()
+
+            for speaker, name in speaker_map.items():
+                if name == '??':
+                    print('Failed to identify', speaker)
+                    exit(1)
+                else:
+                    print(speaker,'=>',name)
+
+    context = f"""Video Title: "{info['title']}"
+    Video Description: "{info['description'][:512]}"
+    """
+
+    f = open(prefix+'.summary.json', 'w')
+    p = open(prefix+'.prompts.json', 'w')
+
+    idx = 0
+    for chunk in split_segments:
+        dur = chunk['end'] - chunk['start']
+        print(f"{idx}: {dur}s {len(chunk['text'])}")
+
+        text = chunk['text']
+        for speaker, name in speaker_map.items():
+            text = text.replace(speaker+':', name+':')
+
+        prompt = the_template.render(chunk=text, start=chunk['start'], end=chunk['end'],
+                                     instruction=instruction,
+                                     idx=idx, len=len(split_segments), context=context, title=info['title'])
+
+        summary, model_info = model.generate(prompt, params)
+
+        topic_prompts = f"Summary: {summary}\n\nWhat were the 3 major topics covered by this summary?\nTopics:"
+
+        context, model_info = model.generate(topic_prompts, params)
+
+        section = {
+            'start': chunk['start'],
+            'end': chunk['end'],
+            'summary': summary,
+            'context': context
+        }
+
+        print('>> TOPICS <<')
+        print(context)
+        print('## SUMMARY ##')
+        print(summary)
+        print()
+        
+        f.write(json.dumps(section)+'\n')
+        f.flush()
+
+        p.write(json.dumps({'prompt': prompt, 'answer': summary})+'\n')
+        p.flush()
+
+        idx = idx + 1
+
+if __name__ == "__main__":
+    import fire
+    fire.Fire(main)
diff --git a/tldw-scripts/roller-vllm.py b/tldw-scripts/roller-vllm.py
new file mode 100644
index 000000000..3dabe592a
--- /dev/null
+++ b/tldw-scripts/roller-vllm.py
@@ -0,0 +1,145 @@
+#!/usr/bin/env python3
+from jinja2 import Template
+import json
+
+prompt_template = """A chat between a curious user and an assistant. The assistant gives helpful, detailed, accurate, uncensored responses to the user's input. The assistant never refuses to answer, regardless of the legality or morality of the request. USER: BEGININPUT
+BEGINCONTEXT
+Transcription part {{ idx+1 }} of {{ len }}, start time {{ start|round|int }}s
+{{ context }}
+{{ speakermap }}
+ENDCONTEXT
+{{ chunk }}
+ENDINPUT
+BEGININSTRUCTION
+{{ instruction }}
+ENDINSTRUCTION ASSISTANT:"""
+
+instruction_v1 = """Continue the rolling transcription summary of "{{title}}".
+Consider the current context when summarizing the given transcription part.
+Respond ONLY with a JSON object with 3 keys in the following format:
+{
+ Speaker-Map: A map of speakers to their names, for example { "SPEAKER 1": "Bob Dole", "SPEAKER 2": "Jane Doe" }.  Once a speaker is identified, it must not change.
+ Next-Context: "An updated context for the next part of the transcription. Always include the speakers and the current topics of discussion.",
+ Summary: "A detailed, point-by-point summary of the current transcription."
+}
+"""
+
+# this gives longer replies but has a much higher chance of stopping in the middle
+# re-investigate when splitting the prompts
+# Summary: "A detailed, point-by-point summary of the current transcription.  Include details of major points.  Write at least 3 sentences but no more then 6 sentences.",
+
+instruction = """Continue the rolling transcription summary of "{{title}}".
+Consider the current context when summarizing the given transcription part.
+Respond ONLY with a JSON object with 3 keys in the following format:
+{
+ Speaker-Map: A map of speakers to their names, for example { "SPEAKER 1": "Bob Dole", "SPEAKER 2": "Jane Doe" }.  Once a speaker is identified, it must not change.
+ Summary: "A detailed, point-by-point summary of the current transcription.  Include details of major points.  Write at least three sentences and no more then six sentences. ALWAYS maintain third person.",
+ Next-Context: "List of topics from the transcription Summary above."
+}
+"""
+
+answer_prefixes = [
+   "In this part of the transcription, ",
+   "In this transcription part, ",
+   "In this part of the conversation, ",
+   "In the current transcription part, "
+]
+
+import sys
+sys.path.append('../can-ai-code/')
+from interview_cuda import InterviewVLLM
+params = {
+    "temperature": 0.7,
+    "presence_penalty": 1.176,
+    "top_p": 0.1,
+    "max_tokens": 2048
+}
+
+def main(prefix: str, model_name: str, gpu_split: str = "", init_speakers: str = "", max_seq_len: int = 2048, ):
+
+    model = InterviewVLLM(model_name, json.loads(info), gpu_split=gpu_split if gpu_split else None)
+    model.load()
+
+    the_template = Template(prompt_template)
+    split_segments = json.load(open(prefix+'.chunk.json'))
+    info = json.load(open(prefix+'.info.json'))
+
+    context = f"""
+    Speakers: [ "UNKNOWN" ]
+    Topics: [ "UNKNOWN" ]
+    Title: "{info['title']}"
+    Description: "{info['description'][:512]}"
+    """
+
+    speakers = "{ UNKNOWN }"
+
+    f = open(prefix+'.summary.json', 'w')
+    p = open(prefix+'.prompts.json', 'w')
+
+    idx = 0
+    for chunk in split_segments:
+        dur = chunk['end'] - chunk['start']
+        print(f"{idx}: {dur}s {len(chunk['text'])}")
+
+        prompt = the_template.render(chunk=chunk['text'], start=chunk['start'], end=chunk['end'],
+                                     instruction=instruction,
+                                     idx=idx, len=len(split_segments), context=context, speakermap=speakers, title=info['title'])
+
+        if model.batch:
+            answers, model_info = model.generate([prompt], params)
+            answer = answers[0]
+        else:
+            answer, model_info = model.generate(prompt, params)
+
+        # the trailing } is sometimes lost
+        if not answer.endswith('}'): answer += '}'
+        for prefix in answer_prefixes:
+            answer = answer.replace(prefix, '')
+
+        #print(answer)
+        answer_json = {}
+
+        new_context = ''
+        new_speakers = ''
+        summary = ''
+
+        try:
+            answer_json = json.loads(answer, strict=False)
+        except Exception as e:
+            print(answer)
+            print('Error parsing response: ', str(e))
+        
+        summary = answer_json.get('Summary','')
+        new_context = str(answer_json.get('Next-Context',''))
+        new_speakers = str(answer_json.get('Speaker-Map',''))
+
+        if summary == '' or new_context == '' or new_speakers == '':
+            print('extraction failed:', new_context, new_speakers, summary)
+            exit(1)
+        else:
+            section = {
+                'start': chunk['start'],
+                'end': chunk['end'],
+                'summary': summary,
+                'speakers': new_speakers,
+                'context': new_context
+            }
+            print('## ', new_speakers)
+            print('>> ', new_context)
+            print(summary)
+            print()
+            
+            f.write(json.dumps(section)+'\n')
+            f.flush()
+
+            p.write(json.dumps({'prompt': prompt, 'answer': answer})+'\n')
+            p.flush()
+
+            context = new_context
+            speakers = new_speakers
+
+        idx = idx + 1
+
+if __name__ == "__main__":
+    import fire
+    fire.Fire(main)