Skip to content

Commit

Permalink
fix: dia-1108 : adding fix to ensure all messages left in output topi…
Browse files Browse the repository at this point in the history
…c are sent to lse and no predictions are lost (#111)

Co-authored-by: Forum Gala <[email protected]>
  • Loading branch information
forum-hs and Forum Gala authored May 8, 2024
1 parent 0f9aa91 commit b20edc1
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 28 deletions.
10 changes: 5 additions & 5 deletions adala/runtimes/_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,10 +367,10 @@ async def batch_to_batch(
# check for errors - if any, append to outputs and continue
if response.get("error"):
# FIXME if we collect failed and succeeded outputs in the same list -> df, we end up with an awkward schema like this:
# output error message details
# ---------------------------
# output1 nan nan nan
# nan true message2 details2
# output error message details
# ---------------------------
# output1 nan nan nan
# nan true message2 details2
# we are not going to send the error response to lse
# outputs.append(response)
if self.verbose:
Expand All @@ -392,7 +392,7 @@ async def batch_to_batch(
# TODO: note that this doesn't work for multiple output fields e.g. `Output {output1} and Output {output2}`
output_df = InternalDataFrame(outputs)
# return output dataframe indexed as input batch.index, assuming outputs are in the same order as inputs
return output_df.set_index('index')
return output_df.set_index("index")

async def record_to_record(
self,
Expand Down
46 changes: 23 additions & 23 deletions server/tasks/process_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def process_file(agent: Agent):
name="streaming_parent_task", track_started=True, bind=True, serializer="pickle"
)
def streaming_parent_task(
self, agent: Agent, result_handler: ResultHandler, batch_size: int = 2
self, agent: Agent, result_handler: ResultHandler, batch_size: int = 10
):
"""
This task is used to launch the two tasks that are doing the real work, so that
Expand Down Expand Up @@ -155,30 +155,30 @@ async def async_process_streaming_output(

input_job_running = True

data = await consumer.getmany(timeout_ms=3000, max_records=batch_size)

while input_job_running:
try:
data = await consumer.getmany(timeout_ms=3000, max_records=batch_size)
for tp, messages in data.items():
if messages:
logger.debug(f"Handling {messages=} in topic {tp.topic}")
data = [msg.value for msg in messages]
result_handler(data)
logger.debug(
f"Handled {len(messages)} messages in topic {tp.topic}"
)
else:
logger.debug(f"No messages in topic {tp.topic}")

if not data:
logger.info(f"No messages in any topic")
finally:
job = process_file_streaming.AsyncResult(input_job_id)
# TODO no way to recover here if connection to main app is lost, job will be stuck at "PENDING" so this will loop forever
if job.status in ["SUCCESS", "FAILURE", "REVOKED"]:
input_job_running = False
logger.info(f"Input job done, stopping output job")
for tp, messages in data.items():
if messages:
logger.debug(f"Handling {messages=} in topic {tp.topic}")
data = [msg.value for msg in messages]
result_handler(data)
logger.debug(f"Handled {len(messages)} messages in topic {tp.topic}")
else:
logger.info(f"Input job still running, keeping output job running")
logger.debug(f"No messages in topic {tp.topic}")

if not data:
logger.info(f"No messages in any topic")

job = process_file_streaming.AsyncResult(input_job_id)
# we are getting packets from the output topic here to check if its empty and continue processing if its not
data = await consumer.getmany(timeout_ms=3000, max_records=batch_size)
# TODO no way to recover here if connection to main app is lost, job will be stuck at "PENDING" so this will loop forever
if job.status in ["SUCCESS", "FAILURE", "REVOKED"] and len(data.items()) == 0:
input_job_running = False
logger.info(f"Input job done, stopping output job")
else:
logger.info(f"Input job still running, keeping output job running")

await consumer.stop()

Expand Down

0 comments on commit b20edc1

Please sign in to comment.