Skip to content

Commit cff5626

Browse files
kevinschaperclaude
andcommitted
Add granular timing for preprocessing vs upload phases
Break down timing to show: - Preprocessing time (chunk creation) - Upload time (parallel HTTP uploads) - Total processing time This helps identify whether bottlenecks are in data processing or network/Solr uploads. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <[email protected]>
1 parent 1d6781d commit cff5626

File tree

1 file changed

+10
-0
lines changed

1 file changed

+10
-0
lines changed

linkml_solr/utils/solr_bulkload.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import os
88
from concurrent.futures import ThreadPoolExecutor, as_completed
99
import threading
10+
import time
1011
from linkml_runtime.linkml_model.meta import SchemaDefinition, SlotDefinitionName
1112
import requests
1213

@@ -204,6 +205,7 @@ def bulkload_chunked(csv_file: str,
204205
print(f"Processing {total_rows} rows in chunks of {chunk_size} with {max_workers} parallel workers")
205206

206207
total_loaded = 0
208+
preprocessing_start = time.time()
207209

208210
# Submit all chunk processing and upload tasks in parallel
209211
with ThreadPoolExecutor(max_workers=max_workers) as executor:
@@ -226,6 +228,10 @@ def bulkload_chunked(csv_file: str,
226228
)
227229
futures.append(future)
228230

231+
preprocessing_time = time.time() - preprocessing_start
232+
print(f"Preprocessing complete ({preprocessing_time:.2f}s) - starting parallel uploads...")
233+
upload_start = time.time()
234+
229235
# Process results as they complete (truly parallel!)
230236
for future in as_completed(futures):
231237
try:
@@ -234,6 +240,10 @@ def bulkload_chunked(csv_file: str,
234240
print(f"Progress: {total_loaded}/{total_rows} documents loaded")
235241
except Exception as e:
236242
print(f"Error in parallel chunk processing: {e}")
243+
244+
upload_time = time.time() - upload_start
245+
total_processing_time = time.time() - preprocessing_start
246+
print(f"Upload complete! Processing: {preprocessing_time:.2f}s, Upload: {upload_time:.2f}s, Total: {total_processing_time:.2f}s")
237247

238248
return total_loaded
239249

0 commit comments

Comments
 (0)