From dc0da7257f729dbcc88524e9fa812a816fdce2dd Mon Sep 17 00:00:00 2001 From: Sitam Meur <103279526+sitamgithub-MSIT@users.noreply.github.com> Date: Mon, 11 Aug 2025 12:40:51 +0000 Subject: [PATCH 1/2] Add GPT-5 model --- code-model-comparison/model_service.py | 27 +++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/code-model-comparison/model_service.py b/code-model-comparison/model_service.py index 860b74a09..42492783b 100644 --- a/code-model-comparison/model_service.py +++ b/code-model-comparison/model_service.py @@ -6,10 +6,10 @@ # Available models AVAILABLE_MODELS = { - "Claude Sonnet 4": "openrouter/anthropic/claude-sonnet-4", + "Claude Opus 4.1": "openrouter/anthropic/claude-opus-4.1", "Qwen3-Coder": "openrouter/qwen/qwen3-coder", "Gemini 2.5 Flash": "openrouter/google/gemini-2.5-flash", - "GPT-4.1": "openrouter/openai/gpt-4.1", + "GPT-5": "gpt-5", } @@ -46,13 +46,22 @@ async def get_model_response_async( try: # Get streaming response from the model using LiteLLM asynchronously. - response = await acompletion( - model=model_mapping, - messages=messages, - api_key=os.getenv("OPENROUTER_API_KEY"), - max_tokens=2000, - stream=True, - ) + if "GPT" in model_name: + response = await acompletion( + model=model_mapping, + messages=messages, + api_key=os.getenv("OPENAI_API_KEY"), + max_completion_tokens=2000, + stream=True, + ) + else: + response = await acompletion( + model=model_mapping, + messages=messages, + api_key=os.getenv("OPENROUTER_API_KEY"), + max_tokens=2000, + stream=True, + ) if not response: yield "Error: No response received from model" From 166a064b855069dab6d97730fe3ac2a255c24c11 Mon Sep 17 00:00:00 2001 From: Sitam Meur <103279526+sitamgithub-MSIT@users.noreply.github.com> Date: Mon, 11 Aug 2025 12:48:02 +0000 Subject: [PATCH 2/2] update streamlit ui --- code-model-comparison/app.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/code-model-comparison/app.py b/code-model-comparison/app.py index aa67a6ea3..ce8d19eb6 100644 --- a/code-model-comparison/app.py +++ b/code-model-comparison/app.py @@ -10,7 +10,7 @@ load_dotenv() # Set page config -st.set_page_config(page_title="Code Generation Model Comparison", layout="wide") +st.set_page_config(page_title="⚔️ CodeArena: Compare Codegen Models", layout="wide") # Custom CSS for responsive code containers st.markdown( @@ -60,7 +60,7 @@ st.session_state.evaluation_results = {"model1": None, "model2": None} # Main interface -st.title("Code Generation Model Comparison") +st.title("⚔️ CodeArena: Compare Codegen Models") powered_by_html = """
Powered by @@ -89,7 +89,7 @@ # If default models are not in available models, use first two available if default_model1 not in all_models: - default_model1 = all_models[0] if all_models else "Claude Sonnet 4" + default_model1 = all_models[0] if all_models else "Claude Opus 4.1" if default_model2 not in all_models: default_model2 = all_models[1] if len(all_models) > 1 else all_models[0]