xpong/main.py at main · pncnmnp/xpong · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import ast
import asyncio
from datetime import datetime
import io
import logging
import math
import os
import pickle
import random
import sys
import time
import threading

from dotenv import load_dotenv, find_dotenv
import eel
import numpy as np
from openai import OpenAI, AsyncOpenAI
from openai.helpers import LocalAudioPlayer
import pandas as pd
from scipy.stats import truncnorm
from sklearn.neighbors import KDTree

env_file = find_dotenv(os.path.join(os.getcwd(), ".env"))
load_dotenv(env_file)

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)

# How many points to win a game?
GAME_POINT = 11

# Debugging mode to speed up the game
SPEEDUP = False
XFPS = 1000 if SPEEDUP else 1
NO_COMMENTARY = SPEEDUP


class GPTClient:
    def __init__(self, api_key, model="gpt-4o-mini"):
        self.model = model
        self.client = OpenAI(api_key=api_key)

    def chat(self, messages, temperature=0.7):
        try:
            response = self.client.chat.completions.create(
                model=self.model, messages=messages, temperature=temperature
            )
            return response.choices[0].message.content
        except Exception as e:
            raise ValueError(f"Failed to generate chat response: {e}")


class GPTPrompts:
    def __init__(self, all_time_greats=None):
        api_key = os.getenv("OPENAI_API_KEY")
        self.gpt_client = GPTClient(api_key)
        self.async_openai = AsyncOpenAI(api_key=api_key)
        self.all_time_greats_prompt = ""
        if isinstance(all_time_greats, pd.DataFrame):
            self.all_time_greats_prompt = self.generate_all_time_greats(all_time_greats)

        self.speech_instruction = (
            ""
            "Personality/affect: A high-energy sports commentator guiding users through administrative tasks.\n\n"
            "Voice: Dynamic, passionate, and engaging, with an exhilarating and motivating quality.\n\n"
            "Tone: Excited and enthusiastic, turning routine tasks into thrilling and high-stakes events.\n\n"
            "Dialect: Informal, fast-paced, and energetic, using sports terminology, vibrant metaphors, and enthusiastic phrasing.\n\n"
            "Pronunciation: Clear, powerful, and emphatic, emphasizing key actions and successes as if celebrating a game-winning moment.\n\n"
            "Features: Incorporates vivid sports analogies, enthusiastic exclamations, "
            "and rapid-fire commentary style to build excitement and maintain a lively pace throughout interactions.\n"
            ""
        )

    def generate_all_time_greats(self, df: pd.DataFrame):
        legends = []
        for _, row in df.head(5).iterrows():
            legends.append(
                {
                    "name": row["name"],
                    "nickname": row["nickname"],
                    "signature_style": row["style"],
                    "career_majors": row["career_majors"],
                    "peak_rating": row["pwr"],
                    "tagline": (
                        f"{row['nickname']} - a {row['style']} maestro with "
                        f"{row['career_majors']} career majors (peak PWR {row['pwr']})."
                    ),
                }
            )
        return legends

    def generate_player_info(self):
        messages = [
            {
                "role": "system",
                "content": (
                    "You are a precise and structured data generator. "
                    "You always return clean, raw JSON. Do not include any explanations, "
                    "markdown formatting, or code block symbols like ```."
                ),
            },
            {
                "role": "user",
                "content": (
                    "Generate exactly 64 rows of fictional person data. "
                    "Each row should be a JSON array (not an object) containing exactly 5 values: "
                    "[autoincremented_id, full_name, nick_name, country_code, date_of_birth]. "
                    "The date of birth should be a string in 'YYYY-MM-DD' format, "
                    "and ages should be between 18 and 40 as of the year 2025. "
                    "Ensure names are culturally consistent with their country. "
                    "Ensure the nicknames have a sporty and fun tone, e.g., 'The Ace', 'Big O', etc. "
                    "The country codes should always be 2-letter ISO country codes. "
                    "Not all countries need to be represented; it's okay if some repeat. "
                    "Return only a single raw JSON array of arrays. "
                    "Do not include any commentary, preamble, or markdown formatting like triple backticks."
                ),
            },
        ]
        chat_response = self.gpt_client.chat(messages)
        chat_response = ast.literal_eval(chat_response)
        player_info = {}
        for player in chat_response:
            player_id, full_name, nick_name, country, dob = player
            player_info[player_id] = {
                "full_name": full_name,
                "nick_name": nick_name,
                "country": country,
                "dob": dob,
            }
        return player_info

    def generate_opening_script(self, h2h):
        tournament_fmt = (
            lambda d: ", ".join(f"{n} '{t}' wins" for t, n in d.items() if n)
            or "No Tournament Wins Yet"
        )
        (
            player_tournament_wins,
            opponent_tournament_wins,
            player_tournament_runner_up,
            opponent_tournament_runner_up,
        ) = map(
            tournament_fmt,
            (
                h2h["player_titles"],
                h2h["opponent_titles"],
                h2h["player_runner_up_titles"],
                h2h["opponent_runner_up_titles"],
            ),
        )
        messages = [
            {
                "role": "system",
                "content": (
                    "You are an insightful and engaging sports commentator assistant for this Pong World Championship Final matchup. "
                    "You always return structured, clear, and realistic commentary in JSON format, "
                    "as an ordered list of turns. Each turn must be represented as an object with two keys: "
                    "'speaker' (alternating strictly between 'Tony McCrae' and 'Nina Novak') and 'text' (the commentary). "
                    "Allow each speaker to speak multiple consecutive sentences in a single turn before switching to the other. "
                    "Always list each player's major-tournament wins in the opening commentary (if any)."
                    "These are the official rules for Pong game:\n"
                    f"1. First player to {GAME_POINT} points wins immediately.\n"
                    "2. One point awarded per missed ball.\n"
                    "3. The player who loses the point serves next.\n"
                    "Never use markdown formatting or additional explanations in the JSON response."
                ),
            },
            {
                "role": "user",
                "content": (
                    f"Here is the head-to-head statistics for today's Pong game:\n"
                    f"Player ID: {h2h['player_id']} vs Opponent ID: {h2h['opponent_id']}\n"
                    f"Player Names: {h2h['player_name']} vs {h2h['opponent_name']}\n"
                    f"Player Ranks: World Number {h2h['player_rank']} vs World Number {h2h['opponent_rank']}\n"
                    f"{h2h['player_name']}'s Tournament Wins: {player_tournament_wins}\n"
                    f"{h2h['opponent_name']}'s Tournament Wins: {opponent_tournament_wins}\n"
                    f"{h2h['player_name']}'s Tournament Runner-ups: {player_tournament_runner_up}\n"
                    f"{h2h['opponent_name']}'s Tournament Runner-ups: {opponent_tournament_runner_up}\n"
                    f"Countries: {h2h['player_country']} vs {h2h['opponent_country']}\n"
                    f"Dates of Birth: {h2h['player_dob']} vs {h2h['opponent_dob']}\n"
                    f"Playing Styles: {h2h['player_style']} vs {h2h['opponent_style']}\n"
                    f"Total Games faced against each other: {h2h['total_games']}\n"
                    f"Win Rate of Player {h2h['player_name']}: {h2h['win_rate']*100}%\n"
                    f"Win Rate of Player {h2h['opponent_name']}: {100 - h2h['win_rate']*100}%\n"
                    f"Average Points Scored by {h2h['player_name']} against {h2h['opponent_name']}: {h2h['avg_points_scored']}\n"
                    f"Average Points Allowed by {h2h['player_name']} against {h2h['opponent_name']}: {h2h['avg_points_allowed']}\n"
                    f"Recent Match Outcomes (from {h2h['player_name']}'s perspective - against {h2h['opponent_name']}): "
                    f"{h2h['head_to_head']}\n\n"
                    f"Generate a commentary opening script structured exactly as an ordered list in JSON format, with each item containing:\n"
                    f"- 'speaker': alternating strictly between 'Tony McCrae' and 'Nina Novak'\n"
                    f"- 'text': multiple consecutive sentences of commentary under one speaker before switching.\n\n"
                    f"Include explicitly:\n"
                    f"- Extend a warm, lively welcome to our global audience, setting an enthusiastic tone right from the start.\n"
                    f"- Briefly introduce the two commentators.\n"
                    f"- Spotlight Madrid, Spain as our grand host, highlighting its global sporting significance and cultural vibrancy.\n"
                    f"- Reveal and discuss both players' world rankings and how those standings elevate the competitive stakes.\n"
                    f"- Provide an engaging, data-driven breakdown of their head-to-head match history and notable statistics.\n"
                    f"- Capture the excitement of each player's entrance, describing the crowd's anticipation and overall atmosphere.\n"
                    f"- Begin the match with an official opening line that includes the phrase: 'Paddles out and away we pong!'\n\n"
                    f"The country code are in ISO 3166-1 alpha-2 format. When commenting, make sure to use the full name of the country.\n\n"
                    f"No additional explanations or markdown should appear outside this JSON-formatted list."
                ),
            },
        ]
        chat_response = self.gpt_client.chat(messages)
        chat_response = ast.literal_eval(chat_response)
        return chat_response

    def generate_in_game_commentary(
        self, base_metrics, commentary_history, score_change=False, scored_by=None
    ):
        # TODO: Add commentary on aces
        # TODO: Mention that the rally is ongoing
        # TODO: Find out similar historical matches of the same players, and refer to them
        # TODO: Summary on how these two got so far in the tournament
        if commentary_history:
            last_speaker = commentary_history[-1].get("speaker", "")
            speaker = "Tony McCrae" if last_speaker == "Nina Novak" else "Nina Novak"
        else:
            speaker = random.choice(["Tony McCrae", "Nina Novak"])

        # How far back do we want to show the history?
        COMM_HISTORY_LENGTH = 10
        recent_history = list(reversed(commentary_history[-COMM_HISTORY_LENGTH:]))
        recent_history = (
            "\n".join(
                [f"{i}. {recent_history[i]}\n" for i in range(len(recent_history))]
            )
            if recent_history
            else "No previous commentary available.\n"
        )

        l, r = base_metrics["left_score"], base_metrics["right_score"]
        if l == r == GAME_POINT - 1:
            match_stage = "Match Stage: Sudden-death — championship point either way; every heartbeat echoes in the hall."
        elif l >= GAME_POINT - 1:
            match_stage = f"Match Stage: Championship point for {base_metrics['left_player_name']} — one clean strike could end it."
        elif r >= GAME_POINT - 1:
            match_stage = f"Match Stage: Championship point for {base_metrics['right_player_name']} — a single winner seals the crown."
        elif max(l, r) >= (GAME_POINT // 2 + GAME_POINT // 4):
            match_stage = f"Match Stage: Final quarter — the finish line is in sight, tension thick, each rally feels worth two."
        elif max(l, r) >= (GAME_POINT // 2):
            match_stage = (
                "Match Stage: Midway battle — momentum teeters, nerves tighten."
            )
        elif max(l, r) >= (GAME_POINT // 4):
            match_stage = (
                "Match Stage: First quarter — early sparring for scoreboard control."
            )
        else:
            match_stage = (
                "Match Stage: Opening exchanges — players probing for weaknesses."
            )
        match_stage += (
            "\nWhen the action shifts into a new phase, "
            "explicitly weave that stage name (e.g., “mid-way stage”, “final quarter”) into your next line of commentary. "
            "Use the commentary history to determine if the new phase has started."
            " You do not need to mention the new phase, if the action is still in the same phase.\n"
        )

        # Determine if hype is required
        long_rally = base_metrics["recent_rally"] and base_metrics["recent_rally"] >= 6
        long_streak = (
            max(base_metrics["left_max_streak"], base_metrics["right_max_streak"]) >= 3
        )

        should_hype = score_change or long_rally or long_streak

        hype_prompt = (
            "Increase excitement! Highlight this moment energetically."
            if should_hype
            else "Keep a conversational tone—no excessive excitement."
        )

        score_change_prompt = (
            f"The score just changed to {base_metrics['left_score']} to {base_metrics['right_score']} "
            f" and the point was scored by {scored_by}."
            " Mention the new score explicitly.\n"
            if score_change and scored_by != None
            else "Don't explicitly mention the score this time. The rally is still ongoing.\n"
        )

        extra_metrics_prompt = (
            "When weaving extra game statistics into your commentary, interpret them like this:\n"
            "- **Ball Bounces:** A high bounce count signals an electrifying, unpredictable rally with the ball ricocheting wildly off the walls, forcing players into quick, unpredictable reactions; a low count points to a controlled, tactical duel where each shot is carefully placed.\n"
            "- **Shot Angles:** Derive each shot's angle from the (vx, vy) vector:\n"
            "    • Steep angles (>45°) become daring corner lobs or sharp cross-courts.\n"
            "    • Moderate angles (15°-45°) look like graceful arcs that test court coverage.\n"
            "    • Shallow angles (<15°) play out as direct, flat drives down the line.\n"
            "  Vary your wording—mix in sports metaphors, court imagery, or player-centric comparisons rather than citing raw numbers.\n"
            "- **Shot Speed:** Classify by mph without prescribing exact phrases:\n"
            "    • >90 mph = a true blistering pace.\n"
            "    • 75-90 mph = a solid mid-range drive.\n"
            "    • <75 mph = a crafty slow-ball or change-up.\n"
            "  But don't reuse the same adjective—rotate through metaphors, analogies, or fresh descriptors to keep the narrative vibrant.\n"
            "- **Paddle Movement:** Measure average Y-axis shifts:\n"
            "    • Movements near the top/bottom edges show aggressive table coverage.\n"
            "    • Movements clustered near center reflect a steady, textbook defensive stance.\n"
            "  Illustrate these actions with varied imagery—mention agility, court geography, or player posture to enrich the play-by-play.\n\n"
            "Blend these interpretations naturally into the flow, using diverse language choices so each mention feels original and engaging."
            "**Important:** Never mention exact numeric values for game metrics like ball bounces, rally counts, shot angles, speeds, or paddle movements. Always provide rounded, approximate aggregates, such as '30+ bounces', '20 plus shots', or 'speeds over 90 mph', to maintain natural commentary flow."
        )

        non_repetition_prompt = (
            "\n**Narrative variation rule:** Before writing, scan the recent commentary "
            "history below (most-recent first). Do **not** repeat a phrase, opener, "
            "or adjective that already appears there (e.g. avoid re-using 'Absolutely', "
            "'What a rally', 'atmosphere is electric', 'tension is palpable', etc.). "
            "Vary synonyms, sentence structure, and limit exclamation marks to **one** "
            "per snippet to keep the commentary fresh.\n"
        )

        color_flag = random.choices([0, 1], [0.7, 0.3])[0]

        legend_cue = ""
        legends_pool = self.all_time_greats_prompt
        if legends_pool and random.random() < 0.1:
            logger.info("\033[93mGOAT cue triggered\033[0m")
            chosen = random.choice(legends_pool)
            legend_cue = (
                f"\nIn this commentary, make sure to weave in a flash comparison to a Pong all-time great "
                f"{chosen['nickname']} ({chosen['name']}) - {chosen['tagline']} "
                "Keep it to a single, punchy sentence.\n"
                "Remember to label him as a legendary and all-time great player, "
                "his career highlights include:\n"
                f"{chosen}\n"
            )

        tournament_fmt = (
            lambda d: ", ".join(
                f"'{t}' tournament {n} times" for t, n in d.items() if n
            )
            or "no tournament wins yet"
        )
        left_tournament_wins, right_tournament_wins = map(
            tournament_fmt,
            (base_metrics["player_titles"], base_metrics["opponent_titles"]),
        )

        # TODO: Make this a one time only thing?
        # Or have a diversity in the similar games,
        # by deleting the ones already used
        similar_game_prompt = ""
        if isinstance(base_metrics["most_similar_game"], pd.DataFrame):
            logger.info("\033[93mHistorical game cue triggered\033[0m")
            similar = base_metrics["most_similar_game"].iloc[0]
            final_score = (
                similar["match_progress"][-1] if similar["match_progress"] else ""
            )
            similar_game_prompt = (
                "\nPRIORITY FLASHBACK: Pause routine commentary and deliver a "
                "concise, high-energy comparison to the classic "
                f"{similar['date']} {similar['tournament_name']} showdown "
                f"(match ID {similar['match_id']}), where players "
                f"{similar['player_id']} and {similar['opponent_id']} battled from the very "
                f"same opening pattern to a {final_score} finish. "
                "Explain—within <= 200 chars - why today's rally feels like deja vu, "
                "then pivot straight back to live action.\n"
            )

        gossip_cue = ""
        if not (legend_cue or similar_game_prompt) and random.random() < 0.1:
            logger.info("\033[93mGossip cue triggered\033[0m")
            gossip_cue = (
                "\nSIDELINE BUZZ (MANDATORY): Craft a <=180-char rumour that fuses what's "
                "happening right now (fatigue signs, gear tweaks, mini-streaks, etc.) with a "
                "plausible locker-room whisper overheard pre-match. **You MUST reference this "
                "rumour explicitly in the next commentary snippet** before pivoting back to live "
                "play.\n"
            )

        messages = [
            {
                "role": "system",
                "content": (
                    f"You are a sports commentator assistant for an ongoing Pong World Championship Final game. "
                    f"Generate the next short commentary snippet spoken by {speaker}. "
                    f"Provide a natural conversational flow by briefly acknowledging or reacting to what your co-commentator previously said. "
                    f"Base your commentary on the provided metrics and recent commentary history, and avoid repeating similar observations consecutively. "
                    f"{hype_prompt} {score_change_prompt} {extra_metrics_prompt} {non_repetition_prompt}"
                    f"{similar_game_prompt}{legend_cue}{gossip_cue} "
                    f"The game is currently in the {match_stage}.\n"
                    f"If the color commentary flag is set to 1, provide creative meta-commentary about the game's strategy, player styles, or atmosphere, without relying heavily on numerical metrics. "
                    "Always keep the text under 200 characters, refer to players by first names only, and avoid excessive focus on shot and serve speeds unless highly significant. "
                    "**After a score change interruption**, smoothly pick up and continue the commentary based directly on the most recent statements from your co-commentator. Explicitly acknowledge or react briefly to what was previously mentioned, maintaining a cohesive, uninterrupted conversational flow."
                    "These are the official rules for Pong game:\n"
                    f"1. First player to {GAME_POINT} points wins immediately.\n"
                    "2. One point awarded per missed ball.\n"
                    "3. The player who loses the point serves next.\n"
                    "Never include markdown or explanations outside the JSON response. "
                    "Return the commentary as a JSON object with keys 'speaker' and 'text'."
                ),
            },
            {
                "role": "user",
                "content": (
                    "This is a World Championship Final game."
                    f"{base_metrics['left_player_name']} has previously won {left_tournament_wins}.\n"
                    f"{base_metrics['right_player_name']} has previously won {right_tournament_wins}\n"
                    f"Base Metrics:\n\n"
                    f"{base_metrics['left_player_name']}'s score is: {base_metrics['left_score']}\n"
                    f"{base_metrics['right_player_name']}'s score is: {base_metrics['right_score']}\n\n"
                    f"{base_metrics['left_player_name']} is ranked world number {base_metrics['left_player_world_rank']}\n"
                    f"{base_metrics['right_player_name']} is ranked world number {base_metrics['right_player_world_rank']}\n\n"
                    f"{base_metrics['left_player_name']} is from {base_metrics['left_player_country']} and has a playing style of {base_metrics['left_player_style']}\n"
                    f"{base_metrics['right_player_name']} is from {base_metrics['right_player_country']} and has a playing style of {base_metrics['right_player_style']}\n\n"
                    f"{base_metrics['left_player_name']}'s recent shot speed: {base_metrics['recent_left_shot_speed']}\n"
                    f"{base_metrics['right_player_name']}'s recent shot speed: {base_metrics['recent_right_shot_speed']}\n\n"
                    f"{base_metrics['left_player_name']}'s average shot speed: {base_metrics['left_shot_speed_avg']}\n"
                    f"{base_metrics['right_player_name']}'s average shot speed: {base_metrics['right_shot_speed_avg']}\n\n"
                    f"{base_metrics['left_player_name']}'s recent serve speed: {base_metrics['recent_left_serve_speed']}\n"
                    f"{base_metrics['right_player_name']}'s recent serve speed: {base_metrics['recent_right_serve_speed']}\n\n"
                    f"{base_metrics['left_player_name']}'s average serve speed: {base_metrics['left_serve_speed_avg']}\n"
                    f"{base_metrics['right_player_name']}'s average serve speed: {base_metrics['right_serve_speed_avg']}\n\n"
                    f"The current rally length is: {base_metrics['recent_rally']}\n"
                    f"The current ball bounce count is: {base_metrics['ball_bounce_count']}\n"
                    f"The average rally length is: {base_metrics['average_rally']}\n\n"
                    f"{base_metrics['left_player_name']}'s average shot angle: {base_metrics['avg_left_shot_angle']}\n"
                    f"{base_metrics['right_player_name']}'s average shot angle: {base_metrics['avg_right_shot_angle']}\n\n"
                    f"{base_metrics['left_player_name']}'s average paddle position: {base_metrics['avg_left_paddle_movement']}\n"
                    f"{base_metrics['right_player_name']}'s average paddle position: {base_metrics['avg_right_paddle_movement']}\n\n"
                    f"{base_metrics['left_player_name']}'s longest winning streak: {base_metrics['left_max_streak']}\n"
                    f"{base_metrics['right_player_name']}'s longest winning streak: {base_metrics['right_max_streak']}\n\n"
                    "The Recent Commentary History provided below is ordered with the **most recent commentary first**.\n"
                    f"Recent Commentary History (most‑recent first):\n{recent_history}\n\n"
                    f"Color Commentary Flag: {color_flag}\n\n"
                    "Generate your next brief commentary now."
                ),
            },
        ]

        logger.info(f"Generating in-game commentary for messages..... {messages}")
        chat_response = self.gpt_client.chat(messages)
        logger.info(
            f"\033[91mChat response for in-game commentary is..... {chat_response}\033[0m"
        )
        return ast.literal_eval(chat_response)

    def generate_final_commentary(self, winner_name, loser_name, final_stats):
        messages = [
            {
                "role": "system",
                "content": (
                    "You are a sports-commentary assistant.  Respond ONLY with a raw JSON "
                    "array of objects, each with keys 'speaker' and 'text', alternating "
                    "between 'Tony McCrae' and 'Nina Novak'.  No markdown, no code fences."
                ),
            },
            {
                "role": "user",
                "content": (
                    f"The match is over.  Winner: {winner_name}.  Runner-up: {loser_name}.\n"
                    f"Final score {final_stats['left_score']}-{final_stats['right_score']}.\n"
                    f"Longest rally: {final_stats['average_rally']:.0f}+ shots.\n"
                    f"Fastest shot-speed today ~ {max(filter(None, [final_stats['left_shot_speed_avg'], final_stats['right_shot_speed_avg']]))} mph.\n"
                    f"Maximum consecutive point won today: L = {final_stats.get('left_max_streak',0)}, R = {final_stats.get('right_max_streak',0)}.\n\n"
                    "* Must open with the line: "
                    f"{winner_name} - Du bist Weltmeister!! You are the world champion!!\n"
                    "* Work in a 2-sentence journey recap (tournament run, key semis).\n"
                    "* Mention two quick stats from the list above - no raw numbers beyond those.\n"
                    "* Close with a joint sign-off, last line by Nina: 'We'll see you next season, adios!'."
                ),
            },
        ]
        logger.info(f"Generating final commentary for messages..... {messages}")
        return ast.literal_eval(self.gpt_client.chat(messages))

    async def speak(self, input, voice):
        async with self.async_openai.audio.speech.with_streaming_response.create(
            model="gpt-4o-mini-tts",
            voice=voice,
            input=input,
            speed=1.4,
            instructions=self.speech_instruction,
            response_format="pcm",
        ) as response:
            await LocalAudioPlayer().play(response)

    async def fetch_audio(self, text: str, voice: str) -> bytes:
        chunks = []
        async with self.async_openai.audio.speech.with_streaming_response.create(
            model="gpt-4o-mini-tts",
            voice=voice,
            input=text,
            speed=2,
            instructions=self.speech_instruction,
            response_format="pcm",
        ) as response:
            async for chunk in response.iter_bytes(chunk_size=1024):
                if chunk:
                    chunks.append(chunk)
        return b"".join(chunks)

    async def speak_in_game_commentary(self, commentary_script):
        voice = "ballad" if commentary_script["speaker"] == "Tony McCrae" else "coral"
        await self.speak(commentary_script["text"], voice)

    async def speak_opening_script(self, head_to_head_stats):
        opening_script = self.generate_opening_script(head_to_head_stats)
        logger.info(
            f"\033[91The generated opening script is..... {opening_script}\033[0m"
        )

        # synthesize all at once, and collect coroutines
        fetch_tasks = []
        for line in opening_script:
            voice = "ballad" if line["speaker"] == "Tony McCrae" else "coral"
            fetch_tasks.append(self.fetch_audio(line["text"], voice))

        # waiting for all pcm buffers
        audio_buffers = await asyncio.gather(*fetch_tasks)

        for buf in audio_buffers:
            resp = InMemoryStreamResponse(buf)
            await LocalAudioPlayer().play(resp)

    async def speak_final_commentary(self, winner_name, loser_name, final_stats):
        final_script = self.generate_final_commentary(
            winner_name, loser_name, final_stats
        )
        logger.info(f"\033[91mThe generated final script is..... {final_script}\033[0m")

        fetch_tasks = []
        for line in final_script:
            voice = "ballad" if line["speaker"] == "Tony McCrae" else "coral"
            fetch_tasks.append(self.fetch_audio(line["text"], voice))
        audio_buffers = await asyncio.gather(*fetch_tasks)
        for buf in audio_buffers:
            resp = InMemoryStreamResponse(buf)
            await LocalAudioPlayer().play(resp)


class InMemoryStreamResponse:
    def __init__(self, pcm_bytes: bytes):
        # uses in-memory bytes buffer
        self.stream = io.BytesIO(pcm_bytes)

    async def iter_bytes(self, chunk_size: int = 1024):
        while True:
            data = self.stream.read(chunk_size)
            if not data:
                break
            yield data


class CommentaryManager:
    def __init__(self, gpt_prompts: GPTPrompts):
        self.gpt_prompts = gpt_prompts
        self.latest_commentary = None
        self.processing_task = None
        # Reproducible shuffle for filler files
        self.filler_index = 0
        self.random_inst = random.Random(random.randint(0, 100))
        self.filler_files = sorted(
            [file for file in os.listdir("./assets/fillers") if file.endswith(".mp3")]
        )
        self.random_inst.shuffle(self.filler_files)
        # We only log history, when speech is synthetized on it
        self.commentary_history = []

    def speak_filler(self):
        # To get the maximum diversity of filler files, we wont be doing a random choice
        # Instead, we will be using a round robin approach
        if self.filler_files:
            filler_file = self.filler_files[self.filler_index]
            self.filler_index = (self.filler_index + 1) % len(self.filler_files)
            file_path = os.path.join("./assets/fillers", filler_file)
            os.system(f"mpg123 {file_path}")

    def flush(self, no_filler=False):
        if self.processing_task is not None and not self.processing_task.done():
            self.processing_task.cancel()
            self.latest_commentary = None
            if not no_filler:
                self.processing_task.add_done_callback(lambda _: self.speak_filler())

    def enqueue(self, commentary_script):
        self.latest_commentary = commentary_script
        if self.processing_task is None or self.processing_task.done():
            self.processing_task = asyncio.create_task(self.process_latest())

    async def process_latest(self):
        while self.latest_commentary is not None:
            commentary = self.latest_commentary
            self.commentary_history.append(commentary)
            self.latest_commentary = None
            await self.gpt_prompts.speak_in_game_commentary(commentary)


class GameStats:
    def __init__(self, num_top_players):
        # Historical game statistics are stored in a dataFrame
        # stores information on the player id, opponent id, match id,
        # date, tournament_id, result, and game statistics.
        # The game statistics include the points scored by the player,
        # the points allowed by the player, the fastest ball speed,
        # and the number of aces
        self.num_top_players = num_top_players
        self.major_tournaments = [
            ("Sovereign Cup", 2, 1, num_top_players // 2),
            ("Grand Invitational", 5, 1, num_top_players // 4),
            ("World Championship", 7, 1, num_top_players),
            ("Dominion Open", 9, 1, num_top_players),
        ]
        self.game_stats = pd.DataFrame(
            columns=[
                "player_id",
                "opponent_id",
                "match_id",
                "date",
                "tournament_name",
                "tournament_id",
                "result",
                "match_progress",
                "num_players_round",
                "points_scored",
                "points_allowed",
                "fastest_ball_speed",
                "aces",
            ]
        )
        self.tournament_stats = pd.DataFrame(
            columns=[
                "winner_id",
                "runner_up_id",
                "tournament_id",
                "tournament_name",
                "tournament_year",
                "match_id",
            ]
        )
        self.player_elo = {}
        self.player_stats = pd.DataFrame(
            columns=[
                "player_id",
                "name",
                "nickname",
                "country",
                "dob",
                "style",
                "player_elo",
                "total_games",
                "win_rate",
                "avg_points_scored",
                "avg_points_allowed",
                "avg_points_diff",
                "longest_winning_streak",
                "longest_losing_streak",
                "fastest_ball_speed",
                "avg_aces",
                "tournaments_won",
                "tournaments_runner_up",
            ]
        )
        self.all_time_greats = None

    def get_all_time_greats(self):
        # PWR=win_rate×log10(1+total_games)
        # PWR stands for Power Rating
        self.player_stats["pwr"] = self.player_stats["win_rate"] * (
            self.player_stats["total_games"].apply(lambda x: math.log10(1 + x))
        )

        # most major tournaments won
        self.player_stats["career_majors"] = self.player_stats["tournaments_won"].apply(
            lambda x: sum([count for _, count in x.items()])
        )

        # GOATs are the players with the most major tournaments won
        # and in case of a tie, the player with the highest PWR
        self.all_time_greats = self.player_stats.sort_values(
            by=["career_majors", "pwr"], ascending=[False, False]
        ).head(self.num_top_players // 8)

    def tournament_pairing(self, player_ids, num_players):
        if len(player_ids) % 2 != 0:
            raise ValueError("Number of players in a tournament must be even.")
        players_by_rankings = sorted(
            player_ids, key=lambda x: self.player_elo[x], reverse=True
        )[:num_players]
        half_len = len(players_by_rankings) // 2
        elite_tier = players_by_rankings[:half_len]
        challenger_tier = players_by_rankings[half_len:]
        random.shuffle(challenger_tier)
        pairings = list(zip(elite_tier, challenger_tier))
        return pairings

    def simulate_aces(self, num_points):
        threshold = num_points // 3
        if random.random() < 0.9:
            return random.randint(0, threshold)
        else:
            return math.floor(random.random() * num_points)

    def simulate_fastest_ball_speed(self):
        lower, upper = 130, 160
        mu = (lower + upper) / 2
        sigma = (upper - lower) / 6
        a, b = (lower - mu) / sigma, (upper - mu) / sigma
        ball_speed = truncnorm.rvs(a, b, loc=mu, scale=sigma)
        return round(ball_speed, 1)

    def games_in_single_day(self, num_round_players, num_total_players):
        return int(max(8 * (num_round_players / num_total_players), 1))

    def simulate_game(self, winner_points, loser_points):
        # There are probably many ways to do this
        # This is the simplest I could think of
        points = ["W"] * winner_points + ["L"] * loser_points
        random.shuffle(points)
        point_by_point = []
        current_score = {"W": 0, "L": 0}
        for point in points:
            current_score[point] += 1
            point_by_point.append(f"{current_score['W']}:{current_score['L']}")
        return point_by_point

    def get_loser_points(self, prob):
        return max(0, min(10, int(random.gauss(10 * (1 - abs(2 * prob - 1)), 1))))

    def simulate_tournament(
        self, player_ids, tournament_details, tournament_id, tournament_year
    ):
        (tournament_name, month, date, num_players) = tournament_details
        tournament_pairings = [
            player
            for pair in self.tournament_pairing(player_ids, num_players)
            for player in pair
        ]
        round_players = tournament_pairings[:]
        match_id_counter = 1

        # Tournament start date
        match_date = datetime(tournament_year, month, date)

        while len(round_players) > 1:
            next_round_players = []
            match_date += pd.DateOffset(days=1)
            curr_round_num_players = len(round_players)
            games_in_single_day = self.games_in_single_day(
                curr_round_num_players, num_players
            )

            for i in range(0, curr_round_num_players, 2):
                player1, player2 = round_players[i], round_players[i + 1]
                p1_rank, p2_rank = self.player_elo[player1], self.player_elo[player2]
                rank_diff = p2_rank - p1_rank

                if match_id_counter % games_in_single_day == 0:
                    match_date += pd.DateOffset(days=1)

                p1_win_probability = 1 / (1 + 10 ** (rank_diff / 400))
                p2_win_probability = 1 - p1_win_probability

                if random.random() < p1_win_probability:
                    winner, loser = player1, player2
                    k_winner = max(1, 32 - 0.04 * (p1_rank - 2000))
                    k_loser = max(32, 32 - 0.04 * (p2_rank - 2000))
                    self.player_elo[winner] += k_winner * (1 - p1_win_probability)
                    self.player_elo[loser] += k_loser * (0 - p2_win_probability)
                    loser_points = self.get_loser_points(p2_win_probability)
                else:
                    winner, loser = player2, player1
                    k_winner = max(1, 32 - 0.04 * (p2_rank - 2000))
                    k_loser = max(32, 32 - 0.04 * (p1_rank - 2000))
                    self.player_elo[winner] += k_winner * (1 - p2_win_probability)
                    self.player_elo[loser] += k_loser * (0 - p1_win_probability)
                    loser_points = self.get_loser_points(p2_win_probability)

                winner_points = GAME_POINT
                match_progress = self.simulate_game(winner_points, loser_points)

                self.game_stats.loc[len(self.game_stats)] = {
                    "player_id": winner,
                    "opponent_id": loser,
                    "match_id": match_id_counter,
                    "date": match_date,
                    "tournament_name": tournament_name,
                    "tournament_id": tournament_id,
                    "result": "W",
                    "match_progress": match_progress,
                    "num_players_round": curr_round_num_players,
                    "points_scored": winner_points,
                    "points_allowed": loser_points,
                    "fastest_ball_speed": self.simulate_fastest_ball_speed(),
                    "aces": self.simulate_aces(winner_points),
                }

                self.game_stats.loc[len(self.game_stats)] = {
                    "player_id": loser,
                    "opponent_id": winner,
                    "match_id": match_id_counter,
                    "date": match_date,
                    "tournament_name": tournament_name,
                    "tournament_id": tournament_id,
                    "num_players_round": curr_round_num_players,
                    "result": "L",
                    "match_progress": match_progress,
                    "points_scored": loser_points,
                    "points_allowed": winner_points,
                    "fastest_ball_speed": self.simulate_fastest_ball_speed(),
                    "aces": self.simulate_aces(loser_points),
                }

                match_id_counter += 1
                next_round_players.append(winner)

            if len(next_round_players) == 2:
                self.tournament_stats.loc[len(self.tournament_stats)] = {
                    "winner_id": next_round_players[0],
                    "runner_up_id": next_round_players[1],
                    "tournament_id": tournament_id,
                    "tournament_name": tournament_name,
                    "tournament_year": tournament_year,
                    "match_id": match_id_counter - 1,
                }
            round_players = next_round_players

    def assign_init_elo(self, player_ids, player_rankings):
        upper_bound = 2700
        lower_bound = 2400
        delta = (upper_bound - lower_bound) / len(player_ids)
        for player_id in player_ids:
            self.player_elo[player_id] = upper_bound - (
                delta * (player_rankings[player_id - 1] - 1)
            )

    def show_top_bottom_elo_stats(self, sorted_players):
        player_elo_df = pd.DataFrame(
            sorted_players, columns=["Player ID", "ELO Rating"]
        )
        logger.info(
            f"\033[91mTop 3 players with highest ELO ratings are..... \n{player_elo_df.head(3)}\033[0m"
        )
        logger.info(
            f"\033[91mBottom 3 players with lowest ELO ratings are..... \n{player_elo_df.tail(3)}\033[0m"
        )
        logger.info(
            f"\033[91mDelta is..... {round(player_elo_df['ELO Rating'].max() - player_elo_df['ELO Rating'].min(), 2)}\033[0m"
        )

    def player_statistics(self, player_ids, player_info):
        for player_id in player_ids:
            name = player_info[player_id]["full_name"]
            nickname = player_info[player_id]["nick_name"]
            country = player_info[player_id]["country"]
            dob = player_info[player_id]["dob"]
            style = "Right Hand" if random.random() > 0.3 else "Left Hand"

            player_stats = self.game_stats[self.game_stats["player_id"] == player_id]
            total_games = len(player_stats)
            total_wins = len(player_stats[player_stats["result"] == "W"])
            win_rate = total_wins / total_games if total_games > 0 else 0
            avg_points_scored = round(player_stats["points_scored"].mean(), 2)
            avg_points_allowed = round(player_stats["points_allowed"].mean(), 2)
            points_diff = round(avg_points_scored - avg_points_allowed, 2)
            fastest_ball_speed = player_stats["fastest_ball_speed"].max()
            avg_aces = round(player_stats["aces"].mean(), 2)

            majors = [t[0] for t in self.major_tournaments]
            tournaments_won = (
                self.tournament_stats[self.tournament_stats["winner_id"] == player_id]
                .groupby("tournament_name")
                .size()
                .reindex(majors, fill_value=0)
                .to_dict()
            )
            tournaments_runner_up = (
                self.tournament_stats[
                    self.tournament_stats["runner_up_id"] == player_id
                ]
                .groupby("tournament_name")
                .size()
                .reindex(majors, fill_value=0)
                .to_dict()
            )

            win_streak, lose_streak = 0, 0
            max_win_streak, max_lose_streak = 0, 0
            for result in player_stats["result"]:
                if result == "W":
                    win_streak += 1
                    lose_streak = 0
                    max_win_streak = max(max_win_streak, win_streak)
                else:
                    lose_streak += 1
                    win_streak = 0
                    max_lose_streak = max(max_lose_streak, lose_streak)
            self.player_stats.loc[len(self.player_stats)] = [
                player_id,
                name,
                nickname,
                country,
                dob,
                style,
                self.player_elo[player_id],
                total_games,
                win_rate,
                avg_points_scored,
                avg_points_allowed,
                points_diff,
                max_win_streak,
                max_lose_streak,
                fastest_ball_speed,
                avg_aces,
                tournaments_won,
                tournaments_runner_up,
            ]

    def head_to_head_statistics(self, player_id, opponent_id):
        player_games = self.game_stats[self.game_stats["player_id"] == player_id]
        player_name = self.player_stats[self.player_stats["player_id"] == player_id][
            "name"
        ].values[0]
        opponent_name = self.player_stats[
            self.player_stats["player_id"] == opponent_id
        ]["name"].values[0]
        player_country, player_dob, player_style = self.player_stats.loc[
            self.player_stats["player_id"] == player_id, ["country", "dob", "style"]
        ].iloc[0]
        opponent_country, opponent_dob, opponent_style = self.player_stats.loc[
            self.player_stats["player_id"] == opponent_id, ["country", "dob", "style"]
        ].iloc[0]

        head_to_head = player_games[player_games["opponent_id"] == opponent_id]
        total_games = len(head_to_head)
        total_wins = len(head_to_head[head_to_head["result"] == "W"])
        win_rate = total_wins / total_games if total_games > 0 else 0
        avg_points_scored = round(head_to_head["points_scored"].mean(), 2)
        avg_points_allowed = round(head_to_head["points_allowed"].mean(), 2)
        # rank of player_id and opponent_id using player_elo
        player_rank = (
            sorted(self.player_elo.items(), key=lambda x: x[1], reverse=True).index(
                (player_id, self.player_elo[player_id])
            )
            + 1
        )
        opponent_rank = (
            sorted(self.player_elo.items(), key=lambda x: x[1], reverse=True).index(
                (opponent_id, self.player_elo[opponent_id])
            )
            + 1
        )

        player_titles = self.player_stats[self.player_stats["player_id"] == player_id][
            "tournaments_won"
        ].values[0]
        opponent_titles = self.player_stats[
            self.player_stats["player_id"] == opponent_id
        ]["tournaments_won"].values[0]
        player_runner_up_titles = self.player_stats[
            self.player_stats["player_id"] == player_id
        ]["tournaments_runner_up"].values[0]
        opponent_runner_up_titles = self.player_stats[
            self.player_stats["player_id"] == opponent_id
        ]["tournaments_runner_up"].values[0]

        return {
            "player_id": player_id,
            "opponent_id": opponent_id,
            "player_name": player_name,
            "opponent_name": opponent_name,
            "player_country": player_country,
            "opponent_country": opponent_country,
            "player_dob": player_dob,
            "opponent_dob": opponent_dob,
            "player_style": player_style,
            "opponent_style": opponent_style,
            "total_games": total_games,
            "win_rate": win_rate,
            "avg_points_scored": avg_points_scored,
            "avg_points_allowed": avg_points_allowed,
            "head_to_head": ", ".join(head_to_head["result"].values),
            "player_rank": player_rank,
            "opponent_rank": opponent_rank,
            "player_titles": player_titles,
            "opponent_titles": opponent_titles,
            "player_runner_up_titles": player_runner_up_titles,
            "opponent_runner_up_titles": opponent_runner_up_titles,
        }


class MetricsCollector:
    def __init__(self, historical_games):
        self.events = []
        self.match_progress = []
        self.historical_games = historical_games
        self.hist_tree = self.build_tree_of_pong(historical_games)
        self.most_similar_game = None

    @staticmethod
    def scores_to_vec(scores, k):
        arr = np.full((k, 2), -1, dtype=np.int8)
        for i, s in enumerate(scores[:k]):
            l, r = map(int, s.split(":"))
            arr[i] = (l, r)
        return arr.ravel()