From a3abd892d4a041eee7e3dfa54ceddcf0d309e3f7 Mon Sep 17 00:00:00 2001
From: crvernon <chrisrvernon@gmail.com>
Date: Wed, 11 Sep 2024 15:54:16 -0400
Subject: [PATCH] move all functions to package

---
 app.py                | 136 ++++++++++------------------------
 highlight/__init__.py |   2 +-
 highlight/prompts.py  |  72 ------------------
 highlight/utils.py    | 165 +++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 202 insertions(+), 173 deletions(-)

diff --git a/app.py b/app.py
index e6d2c2e..f583746 100644
--- a/app.py
+++ b/app.py
@@ -11,89 +11,15 @@
 import highlight as hlt
 
 
-client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
-
-
-def generate_content(
-    container,
-    content,
-    prompt_name="title",
-    result_title="Title Result:",
-    max_tokens=50,
-    temperature=0.0,
-    box_height=200,
-    additional_content=None,
-    max_word_count=100,
-    min_word_count=75,
-    max_allowable_tokens: int = 150000,
-    model="gpt-4o"
-):
-    """
-    Generate content using the OpenAI API based on the provided parameters and display it in a Streamlit container.
-
-    Args:
-        container (streamlit.container): The Streamlit container to display the generated content.
-        content (str): The text content to be used for generating the prompt.
-        prompt_name (str, optional): The name of the prompt to use. Defaults to "title".
-        result_title (str, optional): The title to display above the generated content. Defaults to "Title Result:".
-        max_tokens (int, optional): The maximum number of tokens to generate. Defaults to 50.
-        temperature (float, optional): The sampling temperature. Defaults to 0.0.
-        box_height (int, optional): The height of the text area box to display the generated content. Defaults to 200.
-        additional_content (str, optional): Additional content to include in the prompt. Defaults to None.
-        max_word_count (int, optional): The maximum word count for the generated content. Defaults to 100.
-        min_word_count (int, optional): The minimum word count for the generated content. Defaults to 75.
-        max_allowable_tokens (int, optional): The maximum allowable tokens for the content. Defaults to 150000.
-        model (str, optional): The model to use for content generation. Defaults to "gpt-4o".
-
-    Returns:
-        str: The generated content.
-    """
-
-    response = hlt.generate_prompt(
-        client,
-        content=content,
-        prompt_name=prompt_name,
-        temperature=temperature,
-        max_tokens=max_tokens,
-        max_allowable_tokens=max_allowable_tokens,
-        additional_content=additional_content,
-        model=model
-    )
-
-    container.markdown(result_title)
-
-    word_count = len(response.split())
-
-    if word_count > max_word_count:
-
-        # construct word count reduction prompt
-        reduction_prompt = hlt.prompt_queue["reduce_wordcount"].format(min_word_count, max_word_count, response)
-
-        messages = [
-            {"role": "system", "content": hlt.prompt_queue["system"]},
-            {"role": "user", "content": reduction_prompt}
-        ]
-
-        reduced_response = client.chat.completions.create(
-            model=model,
-            max_tokens=max_tokens,
-            temperature=temperature,
-            messages=messages
-        )
-
-        response = reduced_response.choices[0].message.content
-
-    container.text_area(
-        label=result_title,
-        value=response,
-        label_visibility="collapsed",
-        height=box_height
-    )
-
-    st.write(f"Word count:  {len(response.split())}")
-
-    return response
-
+if "client" not in st.session_state:
+    key = os.getenv("OPENAI_API_KEY", default=None)
+    if key is None:
+        raise KeyError((
+            "No key found for 'OPENAI_API_KEY' system variable. " + 
+            "Obtain your OpenAI API key from the OpenAI website: https://platform.openai.com/api-keys"
+        ))
+    else:
+        st.session_state.client = OpenAI(api_key=key)
 
 if "reduce_document" not in st.session_state:
     st.session_state.reduce_document = False
@@ -186,7 +112,7 @@ def generate_content(
 st.title("Research Highlight Generator")
 
 st.markdown((
-    "This app uses a Large Language Model (LLM) of your choosing to generate ",
+    "This app uses a Large Language Model (LLM) of your choosing to generate " + 
     " formatted research highlight content from an input file."
 ))
 
@@ -293,7 +219,8 @@ def generate_content(
     # build container content
     if title_container.button('Generate Title'):
 
-        st.session_state.title_response = generate_content(
+        st.session_state.title_response = hlt.generate_content(
+            client=st.session_state.client,
             container=title_container,
             content=content_dict["content"],
             prompt_name="title",
@@ -345,7 +272,8 @@ def generate_content(
             st.write("Please generate a Title first.  Subtitle generation considers the title response.")
         else:
 
-            st.session_state.subtitle_response = generate_content(
+            st.session_state.subtitle_response = hlt.generate_content(
+                client=st.session_state.client,
                 container=subtitle_container,
                 content=content_dict["content"],
                 prompt_name="subtitle",
@@ -405,7 +333,8 @@ def generate_content(
 
     # build container content
     if science_container.button('Generate Science Summary'):
-        st.session_state.science_response = generate_content(
+        st.session_state.science_response = hlt.generate_content(
+            client=st.session_state.client,
             container=science_container,
             content=content_dict["content"],
             prompt_name="science",
@@ -464,7 +393,8 @@ def generate_content(
 
     # build container content
     if impact_container.button('Generate Impact Summary'):
-        st.session_state.impact_response = generate_content(
+        st.session_state.impact_response = hlt.generate_content(
+            client=st.session_state.client,
             container=impact_container,
             content=content_dict["content"],
             prompt_name="impact",
@@ -519,7 +449,8 @@ def generate_content(
 
     # build container content
     if summary_container.button('Generate General Summary'):
-        st.session_state.summary_response = generate_content(
+        st.session_state.summary_response = hlt.generate_content(
+            client=st.session_state.client,
             container=summary_container,
             content=content_dict["content"],
             prompt_name="summary",
@@ -563,7 +494,8 @@ def generate_content(
         if st.session_state.summary_response is None:
             st.write("Please generate a general summary first.")
         else:
-            st.session_state.figure_response = generate_content(
+            st.session_state.figure_response = hlt.generate_content(
+                client=st.session_state.client,
                 container=figure_container,
                 content=st.session_state.summary_response,
                 prompt_name="figure",
@@ -608,7 +540,8 @@ def generate_content(
         if st.session_state.summary_response is None:
             st.write("Please generate a general summary first.")
         else:
-            st.session_state.figure_caption = generate_content(
+            st.session_state.figure_caption = hlt.generate_content(
+                client=st.session_state.client,
                 container=figure_summary_container,
                 content=st.session_state.summary_response,
                 prompt_name="figure_caption",
@@ -635,7 +568,8 @@ def generate_content(
     citation_container.markdown("##### Citation for the paper in Chicago style")
     
     if citation_container.button('Generate Citation'):
-        st.session_state.citation = generate_content(
+        st.session_state.citation = hlt.generate_content(
+            client=st.session_state.client,
             container=citation_container,
             content=content_dict["content"],
             prompt_name="citation",
@@ -661,7 +595,8 @@ def generate_content(
     funding_container.markdown("##### Funding statement from the paper")
     
     if funding_container.button('Generate funding statement'):
-        st.session_state.funding = generate_content(
+        st.session_state.funding = hlt.generate_content(
+            client=st.session_state.client,
             container=funding_container,
             content=content_dict["content"],
             prompt_name="funding",
@@ -717,7 +652,7 @@ def generate_content(
         )
 
     # power point slide content
-    st.markdown("### Content to fill in PowerPoint template template:")
+    st.markdown("### Content to fill in PowerPoint template:")
 
     # objective section
     objective_container = st.container()
@@ -745,7 +680,7 @@ def generate_content(
 
     # build container content
     if objective_container.button('Generate Objective'):
-        st.session_state.objective_response = generate_content(
+        st.session_state.objective_response = hlt.generate_content(
             container=objective_container,
             content=content_dict["content"],
             prompt_name="objective",
@@ -793,7 +728,8 @@ def generate_content(
 
     # build container content
     if approach_container.button('Generate Approach'):
-        st.session_state.approach_response = generate_content(
+        st.session_state.approach_response = hlt.generate_content(
+            client=st.session_state.client,
             container=approach_container,
             content=content_dict["content"],
             prompt_name="approach",
@@ -842,7 +778,8 @@ def generate_content(
 
     # build container content
     if ppt_impact_container.button('Generate Impact Points'):
-        st.session_state.ppt_impact_response = generate_content(
+        st.session_state.ppt_impact_response = hlt.generate_content(
+            client=st.session_state.client,
             container=ppt_impact_container,
             content=content_dict["content"],
             prompt_name="ppt_impact",
@@ -890,7 +827,8 @@ def generate_content(
 
     # build container content
     if ppt_figure_selection.button('Generate Figure Recommendation'):
-        st.session_state.figure_recommendation = generate_content(
+        st.session_state.figure_recommendation = hlt.generate_content(
+            client=st.session_state.client,
             container=ppt_figure_selection,
             content=content_dict["content"],
             prompt_name="figure_choice",
@@ -916,7 +854,7 @@ def generate_content(
     # ppt_figure_output = st.container()
     # ppt_figure_output.markdown("##### Export PPT slide with new content when ready")
 
-    # highlight_ppt_template = "data/highlight_template.pptx"
+    # highlight_ppt_template = importlib.resources.files('highlight.data').joinpath('highlight_template.pptx')
     # ppt = Presentation(highlight_ppt_template)
 
     # # get target slide
diff --git a/highlight/__init__.py b/highlight/__init__.py
index a68bf84..d83aa03 100644
--- a/highlight/__init__.py
+++ b/highlight/__init__.py
@@ -1,4 +1,4 @@
-from highlight.prompts import prompt_queue, generate_prompt
+from highlight.prompts import prompt_queue
 from highlight.utils import *
 
 
diff --git a/highlight/prompts.py b/highlight/prompts.py
index d3a4d51..e719f28 100644
--- a/highlight/prompts.py
+++ b/highlight/prompts.py
@@ -1,6 +1,3 @@
-from highlight.utils import generate_content
-
-
 # globals
 EXAMPLE_TEXT_ONE = """Multisector Dynamics: Advancing the Science of Complex Adaptive Human-Earth Systems.  The field of MultiSector Dynamics (MSD) explores the dynamics and co-evolutionary pathways of human and Earth systems with a focus on critical goods, services, and amenities delivered to people through interdependent sectors. This commentary lays out core definitions and concepts, identifies MSD science questions in the context of the current state of knowledge, and describes ongoing activities to expand capacities for open science, leverage revolutions in data and computing, and grow and diversify the MSD workforce. Central to our vision is the ambition of advancing the next generation of complex adaptive human-Earth systems science to better address interconnected risks, increase resilience, and improve sustainability. This will require convergent research and the integration of ideas and methods from multiple disciplines. Understanding the tradeoffs, synergies, and complexities that exist in coupled human-Earth systems is particularly important in the context of energy transitions and increased future shocks."""
 EXAMPLE_TEXT_TWO = """The Role of Regional Connections in Planning for Future Power System Operations Under Climate Extremes.  Identifying the sensitivity of future power systems to climate extremes must consider the concurrent effects of changing climate and evolving power systems. We investigated the sensitivity of a Western U.S. power system to isolated and combined heat and drought when it has low (5%) and moderate (31%) variable renewable energy shares, representing historic and future systems. We used an electricity operational model combined with a model of historically extreme drought (for hydropower and freshwater-reliant thermoelectric generators) over the Western U.S. and a synthetic, regionally extreme heat event in Southern California (for thermoelectric generators and electricity load). We found that the drought has the highest impact on summertime production cost (+10% to +12%), while temperature-based deratings have minimal effect (at most +1%). The Southern California heat wave scenario impacting load increases summertime regional net imports to Southern California by 10–14%, while the drought decreases them by 6–12%. Combined heat and drought conditions have a moderate effect on imports to Southern California (−2%) in the historic system and a stronger effect (+8%) in the future system. Southern California dependence on other regions decreases in the summertime with the moderate increase in variable renewable energy (−34% imports), but hourly peak regional imports are maintained under those infrastructure changes. By combining synthetic and historically driven conditions to test two infrastructures, we consolidate the importance of considering compounded heat wave and drought in planning studies and suggest that region-to-region energy transfers during peak periods are key to optimal operations under climate extremes."""
@@ -226,72 +223,3 @@
     RESPONSE:
     """,
 }
-
-
-def generate_prompt(
-    client,
-    content: str,
-    prompt_name: str = "title",
-    max_tokens: int = 50,
-    max_allowable_tokens: int = 150000,
-    temperature: float = 0.0,
-    additional_content: str = None,
-    model: str = "gpt-4"
-) -> str:
-    """
-    Generate a prompt using the provided parameters and the prompt queue.
-
-    Args:
-        client: The OpenAI client to use for generating the prompt.
-        content (str): The main text content to be used in the prompt.
-        prompt_name (str, optional): The name of the prompt to use. Defaults to "title".
-        max_tokens (int, optional): The maximum number of tokens to generate. Defaults to 50.
-        max_allowable_tokens (int, optional): The maximum allowable tokens for the content. Defaults to 150000.
-        temperature (float, optional): The sampling temperature. Defaults to 0.0.
-        additional_content (str, optional): Additional content to include in the prompt. Defaults to None.
-        model (str, optional): The model to use for content generation. Defaults to "gpt-4".
-
-    Returns:
-        str: The generated prompt.
-    """
-
-    if prompt_name in ("objective",):
-        prompt = prompt_queue[prompt_name].format(EXAMPLE_TEXT_ONE, EXAMPLE_TEXT_TWO, content)
-
-    elif prompt_name in ("approach",):
-        if additional_content is None:
-            additional_content = content
-        prompt = prompt_queue[prompt_name].format(EXAMPLE_TEXT_TWO, content, additional_content)
-
-    elif prompt_name in ("subtitle",):
-        if additional_content is None:
-            additional_content = content
-        prompt = prompt_queue[prompt_name].format(content, additional_content)
-
-
-    elif prompt_name in (
-        "figure", 
-        "caption", 
-        "impact", 
-        "summary", 
-        "ppt_impact", 
-        "title", 
-        "science", 
-        "figure_caption", 
-        "figure_choice", 
-        "citation",
-        "funding"
-    ):
-        prompt = prompt_queue[prompt_name].format(content)
-
-    return generate_content(
-        client=client,
-        system_scope=SYSTEM_SCOPE,
-        prompt=prompt,
-        max_tokens=max_tokens,
-        temperature=temperature,
-        max_allowable_tokens=max_allowable_tokens,
-        model=model
-    )
-
-
diff --git a/highlight/utils.py b/highlight/utils.py
index 5dd66a3..4aeb9f9 100644
--- a/highlight/utils.py
+++ b/highlight/utils.py
@@ -1,6 +1,9 @@
 import tiktoken
 from tqdm import tqdm
 from pypdf import PdfReader
+import streamlit as st
+
+import highlight.prompts as prompts
 
 
 def get_token_count(text, model="gpt-4o"):
@@ -138,7 +141,7 @@ def content_reduction(
     return content
 
 
-def generate_content(
+def generate_prompt_content(
     client,
     system_scope,
     prompt,
@@ -189,3 +192,163 @@ def generate_content(
     content = response.choices[0].message.content
 
     return content
+
+
+def generate_content(
+    client,
+    container,
+    content,
+    prompt_name="title",
+    result_title="Title Result:",
+    max_tokens=50,
+    temperature=0.0,
+    box_height=200,
+    additional_content=None,
+    max_word_count=100,
+    min_word_count=75,
+    max_allowable_tokens: int = 150000,
+    model="gpt-4o"
+):
+    """
+    Generate content using the OpenAI API based on the provided parameters and display it in a Streamlit container.
+
+    Args:
+        container (streamlit.container): The Streamlit container to display the generated content.
+        content (str): The text content to be used for generating the prompt.
+        prompt_name (str, optional): The name of the prompt to use. Defaults to "title".
+        result_title (str, optional): The title to display above the generated content. Defaults to "Title Result:".
+        max_tokens (int, optional): The maximum number of tokens to generate. Defaults to 50.
+        temperature (float, optional): The sampling temperature. Defaults to 0.0.
+        box_height (int, optional): The height of the text area box to display the generated content. Defaults to 200.
+        additional_content (str, optional): Additional content to include in the prompt. Defaults to None.
+        max_word_count (int, optional): The maximum word count for the generated content. Defaults to 100.
+        min_word_count (int, optional): The minimum word count for the generated content. Defaults to 75.
+        max_allowable_tokens (int, optional): The maximum allowable tokens for the content. Defaults to 150000.
+        model (str, optional): The model to use for content generation. Defaults to "gpt-4o".
+
+    Returns:
+        str: The generated content.
+    """
+
+    response = generate_prompt(
+        client,
+        content=content,
+        prompt_name=prompt_name,
+        temperature=temperature,
+        max_tokens=max_tokens,
+        max_allowable_tokens=max_allowable_tokens,
+        additional_content=additional_content,
+        model=model
+    )
+
+    container.markdown(result_title)
+
+    word_count = len(response.split())
+
+    if word_count > max_word_count:
+
+        # construct word count reduction prompt
+        reduction_prompt = prompts.prompt_queue["reduce_wordcount"].format(min_word_count, max_word_count, response)
+
+        messages = [
+            {"role": "system", "content": prompts.prompt_queue["system"]},
+            {"role": "user", "content": reduction_prompt}
+        ]
+
+        reduced_response = client.chat.completions.create(
+            model=model,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            messages=messages
+        )
+
+        response = reduced_response.choices[0].message.content
+
+    container.text_area(
+        label=result_title,
+        value=response,
+        label_visibility="collapsed",
+        height=box_height
+    )
+
+    st.write(f"Word count:  {len(response.split())}")
+
+    return response
+
+
+def generate_prompt(
+    client,
+    content: str,
+    prompt_name: str = "title",
+    max_tokens: int = 50,
+    max_allowable_tokens: int = 150000,
+    temperature: float = 0.0,
+    additional_content: str = None,
+    model: str = "gpt-4"
+) -> str:
+    """
+    Generate a prompt using the provided parameters and the prompt queue.
+
+    Args:
+        client: The OpenAI client to use for generating the prompt.
+        content (str): The main text content to be used in the prompt.
+        prompt_name (str, optional): The name of the prompt to use. Defaults to "title".
+        max_tokens (int, optional): The maximum number of tokens to generate. Defaults to 50.
+        max_allowable_tokens (int, optional): The maximum allowable tokens for the content. Defaults to 150000.
+        temperature (float, optional): The sampling temperature. Defaults to 0.0.
+        additional_content (str, optional): Additional content to include in the prompt. Defaults to None.
+        model (str, optional): The model to use for content generation. Defaults to "gpt-4".
+
+    Returns:
+        str: The generated prompt.
+    """
+
+    if prompt_name in ("objective",):
+        prompt = prompts.prompt_queue[prompt_name].format(
+            prompts.EXAMPLE_TEXT_ONE, 
+            prompts.EXAMPLE_TEXT_TWO, 
+            content
+        )
+
+    elif prompt_name in ("approach",):
+        if additional_content is None:
+            additional_content = content
+        prompt = prompts.prompt_queue[prompt_name].format(
+            prompts.EXAMPLE_TEXT_TWO, 
+            content, 
+            additional_content
+        )
+
+    elif prompt_name in ("subtitle",):
+        if additional_content is None:
+            additional_content = content
+        prompt = prompts.prompt_queue[prompt_name].format(
+            content, 
+            additional_content
+        )
+
+
+    elif prompt_name in (
+        "figure", 
+        "caption", 
+        "impact", 
+        "summary", 
+        "ppt_impact", 
+        "title", 
+        "science", 
+        "figure_caption", 
+        "figure_choice", 
+        "citation",
+        "funding"
+    ):
+        prompt = prompts.prompt_queue[prompt_name].format(content)
+
+    return generate_prompt_content(
+        client=client,
+        system_scope=prompts.SYSTEM_SCOPE,
+        prompt=prompt,
+        max_tokens=max_tokens,
+        temperature=temperature,
+        max_allowable_tokens=max_allowable_tokens,
+        model=model
+    )