@@ -40,20 +40,29 @@ def _get_daily_file_path(self, date_str: str) -> Path:
40
40
41
41
Returns:
42
42
Path to the daily data file
43
+
44
+ Raises:
45
+ ValueError: If date_str is not in valid YYYY-MM-DD format
43
46
"""
44
- # Organize by year and month for better file management
47
+ import re
48
+
49
+ # Strict validation to prevent path traversal attacks
50
+ if not re .match (r"^[0-9]{4}-[0-9]{2}-[0-9]{2}$" , date_str ):
51
+ raise ValueError (f"Invalid date format: { date_str } . Must be YYYY-MM-DD" )
52
+
53
+ # Parse and validate the date
45
54
try :
46
55
date = datetime .strptime (date_str , "%Y-%m-%d" )
47
- year = date . strftime ( "%Y" )
48
- month = date . strftime ( "%m " )
56
+ except ValueError as e :
57
+ raise ValueError ( f"Invalid date: { date_str } . { e } " )
49
58
50
- month_dir = self . daily_dir / year / month
51
- month_dir . mkdir ( parents = True , exist_ok = True )
59
+ year = date . strftime ( "%Y" )
60
+ month = date . strftime ( "%m" )
52
61
53
- return month_dir / f" { date_str } .json"
54
- except ValueError :
55
- # Fallback for invalid date formats
56
- return self . daily_dir / f"{ date_str } .json"
62
+ month_dir = self . daily_dir / year / month
63
+ month_dir . mkdir ( parents = True , exist_ok = True )
64
+
65
+ return month_dir / f"{ date_str } .json"
57
66
58
67
def save_daily_data (
59
68
self , daily_data : List [Dict [str , Any ]], overwrite : bool = False
@@ -85,34 +94,55 @@ def save_daily_data(
85
94
if file_path .exists () and not overwrite :
86
95
# Load existing data to check if it needs updating
87
96
try :
88
- with open (file_path , "r" ) as f :
97
+ with open (file_path , "r" , encoding = "utf-8" ) as f :
89
98
existing_data = json .load (f )
90
99
91
100
# If the data is identical, skip
92
101
if existing_data == day_data :
93
102
self ._saved_dates .add (date_str )
94
103
continue
95
104
96
- # If existing data has more information, keep it
97
- existing_tokens = existing_data .get (
98
- "input_tokens" , 0
99
- ) + existing_data .get ("output_tokens" , 0 )
100
- new_tokens = day_data .get ("input_tokens" , 0 ) + day_data .get (
101
- "output_tokens" , 0
105
+ # Compare total information to decide which data to keep
106
+ # Sum all token counts for comparison
107
+ existing_total_tokens = (
108
+ existing_data .get ("input_tokens" , 0 )
109
+ + existing_data .get ("output_tokens" , 0 )
110
+ + existing_data .get ("cache_creation_tokens" , 0 )
111
+ + existing_data .get ("cache_read_tokens" , 0 )
112
+ )
113
+ new_total_tokens = (
114
+ day_data .get ("input_tokens" , 0 )
115
+ + day_data .get ("output_tokens" , 0 )
116
+ + day_data .get ("cache_creation_tokens" , 0 )
117
+ + day_data .get ("cache_read_tokens" , 0 )
102
118
)
103
119
104
- if existing_tokens >= new_tokens :
120
+ # Compare entries count and cost
121
+ existing_entries = existing_data .get ("entries_count" , 0 )
122
+ new_entries = day_data .get ("entries_count" , 0 )
123
+ existing_cost = existing_data .get ("total_cost" , 0.0 )
124
+ new_cost = day_data .get ("total_cost" , 0.0 )
125
+
126
+ # Keep existing only if it has more total tokens, more entries, AND higher cost
127
+ # This ensures we don't lose any valuable information
128
+ if (
129
+ existing_total_tokens > new_total_tokens
130
+ and existing_entries >= new_entries
131
+ and existing_cost >= new_cost
132
+ ):
105
133
self ._saved_dates .add (date_str )
106
134
continue
107
135
136
+ # Otherwise, save the new data (it has more information)
137
+
108
138
except Exception as e :
109
139
logger .warning (f"Error reading existing data for { date_str } : { e } " )
110
140
111
141
# Save the data
112
142
try :
113
143
temp_file = file_path .with_suffix (".tmp" )
114
- with open (temp_file , "w" ) as f :
115
- json .dump (day_data , f , indent = 2 , default = str )
144
+ with open (temp_file , "w" , encoding = "utf-8" ) as f :
145
+ json .dump (day_data , f , indent = 2 , default = str , ensure_ascii = False )
116
146
temp_file .replace (file_path )
117
147
118
148
self ._saved_dates .add (date_str )
@@ -198,7 +228,7 @@ def load_historical_daily_data(
198
228
continue
199
229
200
230
# Load the data
201
- with open (file_path , "r" ) as f :
231
+ with open (file_path , "r" , encoding = "utf-8" ) as f :
202
232
data = json .load (f )
203
233
historical_data .append (data )
204
234
0 commit comments