try Llama3-7b on Olama on the laptop

dmatrix · dmatrix · commit 51ad1d3209ed · 2024-07-29T10:38:40.000-07:00
Signed-off-by: Jules Damji &lt;dmatrix@comcast.net&gt;
diff --git a/assistants/function_utils.py b/assistants/function_utils.py
@@ -17,8 +17,8 @@
 warnings.filterwarnings('ignore')
 _ = load_dotenv(find_dotenv()) # read local .env file
 
-openai.api_base = os.getenv("ANYSCALE_API_BASE", os.getenv("OPENAI_API_BASE"))
-openai.api_key = os.getenv("ANYSCALE_API_KEY", os.getenv("OPENAI_API_KEY"))
+# TODO: The 'openai.api_base' option isn't read in the client API. You will need to pass it when you instantiate the client, e.g. 'OpenAI(base_url=os.getenv("ANYSCALE_API_BASE", os.getenv("OPENAI_API_BASE")))'
+# openai.api_base = os.getenv("ANYSCALE_API_BASE", os.getenv("OPENAI_API_BASE"))
 weather_api_key = os.getenv("WEATHER_API_KEY")
 MODEL = os.getenv("MODEL")
 print(f"Using MODEL={MODEL}; base={openai.api_base}")
@@ -89,12 +89,12 @@ def create_dalle_image(params,
                                       size="1024x1024",
                                       quality=quality,
                                       n=1)
-    
+
     return response.data[0].url
 
 def get_weather_data(params:Dict[Any, Any]=None,
                     api_base:str="http://api.weatherstack.com/current") -> Dict[str, str]:
-    
+
     """
     Retrieves weather data from the OpenWeatherMap API.
     """
@@ -108,8 +108,8 @@ def get_weather_data(params:Dict[Any, Any]=None,
     warnings.filterwarnings('ignore')
     _ = load_dotenv(find_dotenv()) # read local .env file
 
-    openai.api_base = os.getenv("ANYSCALE_API_BASE", os.getenv("OPENAI_API_BASE"))
-    openai.api_key = os.getenv("ANYSCALE_API_KEY", os.getenv("OPENAI_API_KEY"))
+    # TODO: The 'openai.api_base' option isn't read in the client API. You will need to pass it when you instantiate the client, e.g. 'OpenAI(base_url=os.getenv("ANYSCALE_API_BASE", os.getenv("OPENAI_API_BASE")))'
+    # openai.api_base = os.getenv("ANYSCALE_API_BASE", os.getenv("OPENAI_API_BASE"))
     weather_api_key = os.getenv("WEATHER_API_KEY")
     MODEL = os.getenv("MODEL")
     print(f"Using MODEL={MODEL}; base={openai.api_base}")
@@ -143,4 +143,4 @@ def get_weather_data(params:Dict[Any, Any]=None,
     print(f"Weather data for City: {params['query']}")
     print(f"Temperature            : {weather_data['current']['temperature']}")
     print(f"Weather description    : {weather_data['current']['weather_descriptions']}")
-    
+
diff --git a/assistants/google_search_utils.py b/assistants/google_search_utils.py
@@ -17,7 +17,7 @@ def google_search(params:Dict[Any,Any]) -> List[Dict[str, str]]:
     query = quote_plus(params["query"])
     api_key = params["api_key"]
     num_in_page = params["num_results"]
-    
+
     url = f"https://api.apilayer.com/google_search?q={query}"
 
     payload = {}
@@ -41,8 +41,8 @@ def google_search(params:Dict[Any,Any]) -> List[Dict[str, str]]:
     warnings.filterwarnings('ignore')
     _ = load_dotenv(find_dotenv()) # read local .env file
 
-    openai.api_base = os.getenv("ANYSCALE_API_BASE", os.getenv("OPENAI_API_BASE"))
-    openai.api_key = os.getenv("ANYSCALE_API_KEY", os.getenv("OPENAI_API_KEY"))
+    # TODO: The 'openai.api_base' option isn't read in the client API. You will need to pass it when you instantiate the client, e.g. 'OpenAI(base_url=os.getenv("ANYSCALE_API_BASE", os.getenv("OPENAI_API_BASE")))'
+    # openai.api_base = os.getenv("ANYSCALE_API_BASE", os.getenv("OPENAI_API_BASE"))
     google_api_key = os.getenv("GOOGLE_API_KEY")
     MODEL = os.getenv("MODEL")
     print(f"Using MODEL={MODEL}; base={openai.api_base}")
diff --git a/fine-tuning/explore_hf_data.py b/fine-tuning/explore_hf_data.py
@@ -0,0 +1,79 @@
+import pandas as pd
+from datasets import load_dataset
+import uuid
+
+"""
+Example code to explore the dataset from the HuggingFace Hub, derived
+from the Red Dot Design Award Product Description dataset and convert it into a pandas dataframe.
+Code is from the following source:
+https://huggingface.co/datasets/xiyuez/red-dot-design-award-product-description
+Databricks Book on GenAI. (2021). Fine-tuning a pre-trained model with the Hugging Face library.
+"""
+
+if __name__ == "__main__":
+
+    #Load the dataset from the HuggingFace Hub
+    rd_ds = load_dataset("xiyuez/red-dot-design-award-product-description")
+    pd.set_option('display.max_columns', None)  
+    # convert the dataset into a pandas dataframe
+    df = pd.DataFrame(rd_ds['train'])
+    # print column names
+    print(df.columns)
+    print("------------------")
+
+    # print the first product, category, description, and text
+    print(f"product:{df['product'][0]}")
+    print(f"category:{df['category'][0]}")
+    print(f"description:{df['description'][0]}")
+    print("------------------")
+    print(f"text: {df['text'][0]}")
+    print("------------------")
+
+    # Combine the two attributes into an instruction string
+    df['instruction'] = 'Create a detailed description for the following product: '+ df['product']+', belonging to category: '+ df['category']
+
+    df = df[['instruction', 'description']]
+    # Get a 5000 sample subset for fine-tuning purposes
+    df_sample = df.sample(n=5000, random_state=42)
+    print(df_sample.columns)
+    print("------------------")
+    print(df_sample.head())
+    print("------------------")
+
+    # Define template and format data into the template 
+    # for supervised fine-tuning
+    template = """Below is an instruction that describes a task. 
+                  Write a response that appropriately completes the
+                  request.
+                  ### Instruction:
+                  {}
+                  ### Response:\n"""
+
+    df_sample['prompt'] = df_sample["instruction"].apply(lambda x: template.format(x))
+    df_sample.rename(columns={'description': 'response'}, inplace=True)
+    df_sample['response'] = df_sample['response'] + "\n### End"
+    df_sample = df_sample[['prompt', 'response']]
+    # print df_sample column names
+    print(f" Modified column names: {df_sample.columns}")
+    print("------------------")
+    print(df_sample.head(2))
+    print("------------------")
+    print(f"prompt:{df_sample['prompt'][0]}")
+    print(f"response:{df_sample['response'][0]}")
+    print("------------------")
+
+    # # test putting garbage in one of the original columns
+    # # Create a copy of the original dataframe
+    # df_copy = df.copy()
+    # # Put garbage in the first row of the prompt column
+    # # using a lambda function
+    # print("before lambada ------------------")
+    # print(df_copy.columns)
+    # print("after lambada ------------------")
+    # df_copy['prompt'] = df_copy['description'].apply(lambda x: len(x))
+    # print(df_copy.columns)
+    # print(df_copy.head(3))
+
+    
+    print(f"dataset-{uuid.uuid4()}")
+    
diff --git a/function-calling/openai_anyscale_parallel_function_calling_db.py b/function-calling/openai_anyscale_parallel_function_calling_db.py
@@ -67,7 +67,7 @@ def pretty_print_conversation(messages: List[dict]):
         "assistant": "blue",
         "function": "magenta",
     }
-    
+
     for message in messages:
         if message["role"] == "system":
             print(colored(f"system: {message['content']}\n", role_to_color[message["role"]]))
@@ -83,8 +83,8 @@ def pretty_print_conversation(messages: List[dict]):
 if __name__ == "__main__":
     _ = load_dotenv(find_dotenv()) # read local .env file
     warnings.filterwarnings('ignore')
-    openai.api_base = os.getenv("ANYSCALE_API_BASE", os.getenv("OPENAI_API_BASE"))
-    openai.api_key = os.getenv("ANYSCALE_API_KEY", os.getenv("OPENAI_API_KEY"))
+    # TODO: The 'openai.api_base' option isn't read in the client API. You will need to pass it when you instantiate the client, e.g. 'OpenAI(base_url=os.getenv("ANYSCALE_API_BASE", os.getenv("OPENAI_API_BASE")))'
+    # openai.api_base = os.getenv("ANYSCALE_API_BASE", os.getenv("OPENAI_API_BASE"))
     MODEL = os.getenv("MODEL")
     print(f"Using MODEL={MODEL}; base={openai.api_base}")
 
diff --git a/function-calling/openai_parallel_function_calling_external.py b/function-calling/openai_parallel_function_calling_external.py
@@ -17,7 +17,7 @@
 
 def get_weather_data(params:Dict[Any, Any]=None,
                     api_base:str="http://api.weatherstack.com/current") -> Dict[str, str]:
-    
+
     """
     Retrieves weather data from the OpenWeatherMap API.
     """
@@ -51,7 +51,7 @@ def get_current_weather(location, units="f") -> Dict[str, str]:
     })
     print(weather_data)
     return weather_data
-    
+
 def run_conversation(client: object, model: str) -> object:
     # Step 1: send the messages and available functions to the model
     messages = [{"role": "user", "content": "What's the weather like in three cities: San Francisco, Tokyo, and Paris?"}]
@@ -90,7 +90,7 @@ def run_conversation(client: object, model: str) -> object:
     print(f"tool_calls: {tool_calls}")
     # Step 2: check if the model wanted to call a function
     if tool_calls:
-        
+
         # Step 3: call the function
         # Note: the JSON response may not always be valid; be sure to handle errors
         available_functions_table = {
@@ -125,12 +125,12 @@ def run_conversation(client: object, model: str) -> object:
         return second_response
     else:
         return response
-    
+
 if __name__ == "__main__":
     _ = load_dotenv(find_dotenv()) # read local .env file
     warnings.filterwarnings('ignore')
-    openai.api_base = os.getenv("ANYSCALE_API_BASE", os.getenv("OPENAI_API_BASE"))
-    openai.api_key = os.getenv("ANYSCALE_API_KEY", os.getenv("OPENAI_API_KEY"))
+    # TODO: The 'openai.api_base' option isn't read in the client API. You will need to pass it when you instantiate the client, e.g. 'OpenAI(base_url=os.getenv("ANYSCALE_API_BASE", os.getenv("OPENAI_API_BASE")))'
+    # openai.api_base = os.getenv("ANYSCALE_API_BASE", os.getenv("OPENAI_API_BASE"))
     weather_api_key = os.getenv("WEATHER_API_KEY")
     MODEL = os.getenv("MODEL")
     print(f"Using MODEL={MODEL}; base={openai.api_base}")
diff --git a/function-calling/streamlit_func_calling_db_app.py b/function-calling/streamlit_func_calling_db_app.py
@@ -58,7 +58,7 @@ def chat_completion_request(clnt:object, messages:object,
         print("Unable to generate ChatCompletion response")
         print(f"Exception: {e}")
         return e
-        
+
 conn = connect_db("customers.db")
 database_schema_string = get_database_schema(conn)
 
@@ -153,8 +153,8 @@ def get_answer(client: object,
 
     _ = load_dotenv(find_dotenv()) # read local .env file
     warnings.filterwarnings('ignore')
-    openai.api_base = os.getenv("ANYSCALE_API_BASE", os.getenv("OPENAI_API_BASE"))
-    openai.api_key = os.getenv("ANYSCALE_API_KEY", os.getenv("OPENAI_API_KEY"))
+    # TODO: The 'openai.api_base' option isn't read in the client API. You will need to pass it when you instantiate the client, e.g. 'OpenAI(base_url=os.getenv("ANYSCALE_API_BASE", os.getenv("OPENAI_API_BASE")))'
+    # openai.api_base = os.getenv("ANYSCALE_API_BASE", os.getenv("OPENAI_API_BASE"))
     MODEL = os.getenv("MODEL")
     print(f"Using MODEL={MODEL}; base={openai.api_base}")
 
diff --git a/llm-prompts/basic_ollama_llama3.py b/llm-prompts/basic_ollama_llama3.py
@@ -0,0 +1,7 @@
+import ollama
+
+response = ollama.generate(model="llama3", prompt="Why is the sky blue?", stream=True)
+# Stream response
+for chunk in response:
+    data = chunk["response"]
+    print(data, end="")