From dc9a52d92c1dd63ae94838f4d5a1a9d7d08d3f49 Mon Sep 17 00:00:00 2001 From: George Pantazopoulos Date: Tue, 16 May 2023 14:28:25 +0000 Subject: [PATCH 1/8] feat: fair eval mode --- docker/docker-compose.yaml | 2 ++ .../common/settings/__init__.py | 2 +- .../common/settings/simbot.py | 2 +- src/emma_experience_hub/constants/simbot.py | 2 ++ .../constants/simbot/registry.yaml | 4 ++-- .../datamodels/simbot/agent_memory.py | 16 ++-------------- .../datamodels/simbot/session.py | 2 +- 7 files changed, 11 insertions(+), 19 deletions(-) diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 4737127b..cb1047a2 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -54,6 +54,7 @@ services: LOG_LEVEL: debug MODEL_NAME: "heriot-watt/emma-base" MODEL_CHECKPOINT_PATH: "/app/model/${INTENT_EXTRACTOR_MODEL}" + ENABLE_PREDICTION_PATCHING: False ports: - "5501:6000" healthcheck: @@ -69,6 +70,7 @@ services: environment: LOG_LEVEL: debug MODEL_CHECKPOINT_PATH: "/app/model/${INSTRUCTION_PREDICTOR_MODEL}" + ENABLE_PREDICTION_PATCHING: False ports: - "5502:6000" healthcheck: diff --git a/src/emma_experience_hub/common/settings/__init__.py b/src/emma_experience_hub/common/settings/__init__.py index c5ff52a8..5c051cba 100644 --- a/src/emma_experience_hub/common/settings/__init__.py +++ b/src/emma_experience_hub/common/settings/__init__.py @@ -1,2 +1,2 @@ from emma_experience_hub.common.settings.settings import Settings -from emma_experience_hub.common.settings.simbot import SimBotFeatureFlags, SimBotSettings +from emma_experience_hub.common.settings.simbot import SimBotSettings diff --git a/src/emma_experience_hub/common/settings/simbot.py b/src/emma_experience_hub/common/settings/simbot.py index 9877cc47..c395dab5 100644 --- a/src/emma_experience_hub/common/settings/simbot.py +++ b/src/emma_experience_hub/common/settings/simbot.py @@ -23,7 +23,7 @@ class SimBotFeatureArgs(BaseModel): class SimBotFeatureFlags(BaseModel): """Feature flags for the SimBot agent.""" - enable_offline_evaluation: bool = False + enable_offline_evaluation: bool = True enable_always_highlight_before_object_action: bool = False enable_clarification_questions: bool = True diff --git a/src/emma_experience_hub/constants/simbot.py b/src/emma_experience_hub/constants/simbot.py index ff3f7284..4cbc5f99 100644 --- a/src/emma_experience_hub/constants/simbot.py +++ b/src/emma_experience_hub/constants/simbot.py @@ -95,6 +95,7 @@ def get_feedback_rules() -> list[dict[str, str]]: @lru_cache(maxsize=1) def get_prior_memory() -> dict[str, str]: """Load prior memory of object location in rooms from file.""" + return {} json_path = constants_absolute_path.joinpath("simbot", "prior_memory.json") with open(json_path) as json_file: return json.load(json_file)["fixed_locations"] @@ -103,6 +104,7 @@ def get_prior_memory() -> dict[str, str]: @lru_cache(maxsize=1) def get_prior_memory_candidates() -> dict[str, list[str]]: """Load prior memory of object location in candidate rooms from file.""" + return {} json_path = constants_absolute_path.joinpath("simbot", "prior_memory.json") with open(json_path) as json_file: return json.load(json_file)["candidate_locations"] diff --git a/src/emma_experience_hub/constants/simbot/registry.yaml b/src/emma_experience_hub/constants/simbot/registry.yaml index 01fc4bc5..01574d5f 100644 --- a/src/emma_experience_hub/constants/simbot/registry.yaml +++ b/src/emma_experience_hub/constants/simbot/registry.yaml @@ -11,13 +11,13 @@ services: - name: intent_extractor image_repository_uri: 020417957102.dkr.ecr.us-east-1.amazonaws.com/emma/policy - image_version: 1.45.6-cu113 + image_version: 1.45.7-cu113 model_url: s3://emma-simbot/model/checkpoints/simbot_nlu/simbot_nlu_batch12_epoch30.ckpt - name: instruction_predictor image_repository_uri: 020417957102.dkr.ecr.us-east-1.amazonaws.com/emma/policy - image_version: 1.45.6-cu113 + image_version: 1.45.7-cu113 model_url: s3://emma-simbot/model/checkpoints/simbot_action/action_cdf_batch12.ckpt diff --git a/src/emma_experience_hub/datamodels/simbot/agent_memory.py b/src/emma_experience_hub/datamodels/simbot/agent_memory.py index 12c51d4f..6a21fbd7 100644 --- a/src/emma_experience_hub/datamodels/simbot/agent_memory.py +++ b/src/emma_experience_hub/datamodels/simbot/agent_memory.py @@ -2,7 +2,6 @@ from pydantic import BaseModel, Field -from emma_experience_hub.common.settings import SimBotFeatureFlags from emma_experience_hub.constants.simbot import get_prior_memory, get_prior_memory_candidates from emma_experience_hub.datamodels import EmmaExtractedFeatures from emma_experience_hub.datamodels.common import ArenaLocation, Position, RotationQuaternion @@ -36,19 +35,8 @@ class SimBotObjectMemory(BaseModel): """Track all the observed objects and their closest viewpoints.""" memory: dict[str, SimBotRoomMemoryType] = {} - _prior_memory: dict[str, str] = {} - _prior_memory_candidates: dict[str, list[str]] = {} - - @classmethod - def from_simbot_feature_flags(cls) -> "SimBotObjectMemory": - """Instantiate the prior memory depending on the feature flags.""" - flags = SimBotFeatureFlags() - if flags.enable_prior_memory: - return cls( - _prior_memory=get_prior_memory(), - _prior_memory_candidates=get_prior_memory_candidates(), - ) - return cls() + _prior_memory: dict[str, str] = get_prior_memory() + _prior_memory_candidates: dict[str, list[str]] = get_prior_memory_candidates() def update_from_action( # noqa: WPS231 self, diff --git a/src/emma_experience_hub/datamodels/simbot/session.py b/src/emma_experience_hub/datamodels/simbot/session.py index 084aaa1f..c6fdfa18 100644 --- a/src/emma_experience_hub/datamodels/simbot/session.py +++ b/src/emma_experience_hub/datamodels/simbot/session.py @@ -303,7 +303,7 @@ class SimBotSessionState(BaseModel, validate_assignment=True): utterance_queue: SimBotQueue[SimBotQueueUtterance] = SimBotQueue[SimBotQueueUtterance]() find_queue: SimBotQueue[SimBotAction] = SimBotQueue[SimBotAction]() inventory: SimBotInventory = SimBotInventory() - memory: SimBotObjectMemory = SimBotObjectMemory.from_simbot_feature_flags() + memory: SimBotObjectMemory = SimBotObjectMemory() last_user_utterance: SimBotQueue[str] = SimBotQueue[str]() From 768f0e2d714189a5c5e40edefd785cbd67e5fa39 Mon Sep 17 00:00:00 2001 From: George Pantazopoulos Date: Tue, 23 May 2023 15:59:30 +0000 Subject: [PATCH 2/8] feat: update registry --- src/emma_experience_hub/constants/simbot/registry.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/emma_experience_hub/constants/simbot/registry.yaml b/src/emma_experience_hub/constants/simbot/registry.yaml index 01574d5f..5b71b0cb 100644 --- a/src/emma_experience_hub/constants/simbot/registry.yaml +++ b/src/emma_experience_hub/constants/simbot/registry.yaml @@ -11,15 +11,15 @@ services: - name: intent_extractor image_repository_uri: 020417957102.dkr.ecr.us-east-1.amazonaws.com/emma/policy - image_version: 1.45.7-cu113 + image_version: 1.47.1-cu113 - model_url: s3://emma-simbot/model/checkpoints/simbot_nlu/simbot_nlu_batch12_epoch30.ckpt + model_url: s3://emma-simbot/model/checkpoints/simbot_combined/combined_human_vision_aug_dowsampled_cdf_aug_lr0.0001.ckpt - name: instruction_predictor image_repository_uri: 020417957102.dkr.ecr.us-east-1.amazonaws.com/emma/policy - image_version: 1.45.7-cu113 + image_version: 1.47.1-cu113 - model_url: s3://emma-simbot/model/checkpoints/simbot_action/action_cdf_batch12.ckpt + model_url: s3://emma-simbot/model/checkpoints/simbot_combined/combined_human_vision_aug_dowsampled_cdf_aug_lr0.0001.ckpt - name: feature_extractor image_repository_uri: 020417957102.dkr.ecr.us-east-1.amazonaws.com/emma/perception From 55072049d953d9f9967f7b6aca147aef4c311cd7 Mon Sep 17 00:00:00 2001 From: George Pantazopoulos Date: Sat, 2 Dec 2023 09:11:27 +0000 Subject: [PATCH 3/8] feat: add local db --- .../api/clients/simbot/__init__.py | 1 + .../api/clients/simbot/cache.py | 4 +- .../api/clients/simbot/session_local_db.py | 179 ++++++++++++++++++ .../api/controllers/simbot/clients.py | 11 +- .../api/controllers/simbot/controller.py | 2 + .../api/controllers/simbot/pipelines.py | 2 + src/emma_experience_hub/api/simbot.py | 2 +- .../common/settings/simbot.py | 5 + .../instruction_handler.py | 19 +- .../simbot/agent_action_generation.py | 12 +- .../simbot/agent_intent_selection.py | 2 + .../pipelines/simbot/request_processing.py | 3 +- 12 files changed, 226 insertions(+), 16 deletions(-) create mode 100644 src/emma_experience_hub/api/clients/simbot/session_local_db.py diff --git a/src/emma_experience_hub/api/clients/simbot/__init__.py b/src/emma_experience_hub/api/clients/simbot/__init__.py index 700cbe83..3a6b5181 100644 --- a/src/emma_experience_hub/api/clients/simbot/__init__.py +++ b/src/emma_experience_hub/api/clients/simbot/__init__.py @@ -11,3 +11,4 @@ from emma_experience_hub.api.clients.simbot.placeholder_vision import SimBotPlaceholderVisionClient from emma_experience_hub.api.clients.simbot.qa_intent import SimBotQAIntentClient from emma_experience_hub.api.clients.simbot.session_db import SimBotSessionDbClient +from emma_experience_hub.api.clients.simbot.session_local_db import SimBotSQLLiteClient diff --git a/src/emma_experience_hub/api/clients/simbot/cache.py b/src/emma_experience_hub/api/clients/simbot/cache.py index 77543008..6397431f 100644 --- a/src/emma_experience_hub/api/clients/simbot/cache.py +++ b/src/emma_experience_hub/api/clients/simbot/cache.py @@ -43,7 +43,7 @@ def __init__( self.prefix = object_prefix if object_prefix is not None else "" self._local_cache_dir = local_cache_dir - self._s3 = S3Client(local_cache_dir=self._local_cache_dir) + # self._s3 = S3Client(local_cache_dir=self._local_cache_dir) def healthcheck(self) -> bool: """Healthcheck for the client.""" @@ -63,6 +63,8 @@ def load(self, session_id: str, prediction_request_id: str) -> T: def upload_to_s3(self, session_id: str, prediction_request_id: str) -> None: """Load the cached data to S3.""" + # Dont upload anything to s3 + return None cached_data = self._load_bytes(session_id, prediction_request_id) s3_path = self._create_s3_path(session_id, prediction_request_id) s3_path.write_bytes(cached_data) diff --git a/src/emma_experience_hub/api/clients/simbot/session_local_db.py b/src/emma_experience_hub/api/clients/simbot/session_local_db.py new file mode 100644 index 00000000..a9ce95ec --- /dev/null +++ b/src/emma_experience_hub/api/clients/simbot/session_local_db.py @@ -0,0 +1,179 @@ +from concurrent.futures import ThreadPoolExecutor +from pathlib import Path + +import sqlite3 +from loguru import logger + +from emma_experience_hub.api.clients.client import Client +from emma_experience_hub.datamodels.simbot import SimBotSessionTurn + + +class SimBotSQLLiteClient: + """Local Client for storing SimBot session data.""" + + primary_key: str + sort_key: str + data_key: str + + def __init__(self, db_file: Path) -> None: + self._db_file = db_file + self.create_table() + + def create_table(self) -> None: # noqa: WPS231 + """Create table.""" + if self._db_file.exists(): + return + + try: # noqa: WPS229 + connection = sqlite3.connect(self._db_file) + sqlite_create_table_query = """CREATE TABLE session_table ( + primary_key TEXT NOT NULL, + sort_key INTEGER NOT NULL, + data_key TEXT NOT NULL, + PRIMARY KEY (primary_key, sort_key) + );""" + + cursor = connection.cursor() + cursor.execute(sqlite_create_table_query) + connection.commit() + logger.info("SQLite table created") + + cursor.close() + + except sqlite3.Error as error: + logger.exception("Error while creating a sqlite table", error) + finally: + if connection: + connection.close() + + def healthcheck(self) -> bool: + """Verify that the DB can be accessed and that it is ready.""" + try: + sqlite3.connect(self._db_file) + except Exception: + logger.exception("Cannot find db table") + return False + + return True + + def add_session_turn(self, session_turn: SimBotSessionTurn) -> None: + """Add a session turn to the table.""" + try: # noqa: WPS229 + connection = sqlite3.connect(self._db_file) + cursor = connection.cursor() + + sqlite_insert_with_param = """INSERT OR REPLACE INTO session_table + (primary_key, sort_key, data_key) + VALUES (?, ?, ?);""" + + data_tuple = ( + session_turn.session_id, + session_turn.idx, + session_turn.json(by_alias=True), + ) + cursor.execute(sqlite_insert_with_param, data_tuple) + connection.commit() + logger.info("Successfully inserted turn into table") + + cursor.close() + + except sqlite3.Error as error: + logger.exception("Failed to insert turn into table") + raise error + finally: + if connection: + connection.close() + + def put_session_turn(self, session_turn: SimBotSessionTurn) -> None: + """Put a session turn to the table. + + If the turn already exists, it WILL overwrite it. + """ + self.add_session_turn(session_turn) + + def get_session_turn(self, session_id: str, idx: int) -> SimBotSessionTurn: + """Get the session turn from the table.""" + try: # noqa: WPS229 + connection = sqlite3.connect(self._db_file) + cursor = connection.cursor() + + sql_select_query = "select * from session_table where primary_key = ? and sort_key = ?" + cursor.execute(sql_select_query, (session_id, idx)) + turn = cursor.fetchone() + cursor.close() + + except sqlite3.Error as error: + logger.exception("Failed to read data from table") + raise error + finally: + if connection: + connection.close() + + return SimBotSessionTurn.parse_raw(turn[2]) + + def get_all_session_turns(self, session_id: str) -> list[SimBotSessionTurn]: + """Get all the turns for a given session.""" + try: + all_raw_turns = self._get_all_session_turns(session_id) + except Exception as query_err: + logger.exception("Could not query for session turns") + raise query_err + + with ThreadPoolExecutor() as thread_pool: + # Try parse everything and hope it doesn't crash + try: + parsed_responses = list( + thread_pool.map( + SimBotSessionTurn.parse_raw, + (response_item[2] for response_item in all_raw_turns), + ) + ) + except Exception: + logger.exception( + "Could not parse session turns from response. Returning an empty list." + ) + return [] + + logger.debug(f"Successfully got previous `{len(parsed_responses)}` turns") + + # Sort the responses by the sort key before returning + sorted_responses = sorted(parsed_responses, key=lambda turn: turn.idx) + + return sorted_responses + + def _get_all_session_turns(self, session_id: str) -> list[tuple[str, int, str]]: + try: # noqa: WPS229 + connection = sqlite3.connect(self._db_file) + cursor = connection.cursor() + + sql_select_query = ( + "select * from session_table where primary_key = ? ORDER BY sort_key" + ) + cursor.execute(sql_select_query, (session_id,)) + turns = cursor.fetchall() + cursor.close() + + except sqlite3.Error as error: + logger.exception("Failed to read data from table") + raise error + finally: + if connection: + connection.close() + + return turns + + +# db = SimBotSQLLiteClient( +# Path("/home/gmp2000/simbot-offline-inference/storage/experience-hub/storage/local_sessions.db") +# ) + +# session_turns = db.get_all_session_turns("T1_39786908-bee2-48a5-bf33-6640de5b80e8") + +# for turn in session_turns: +# print(turn.prediction_request_id) +# if turn.speech: +# print(turn.speech.original_utterance.utterance, turn.speech.modified_utterance.utterance) +# print(turn.actions.interaction.raw_output) +# if turn.actions.interaction.status: +# print(turn.actions.interaction.status.success) +# breakpoint() diff --git a/src/emma_experience_hub/api/controllers/simbot/clients.py b/src/emma_experience_hub/api/controllers/simbot/clients.py index 4d9ea245..1117db05 100644 --- a/src/emma_experience_hub/api/controllers/simbot/clients.py +++ b/src/emma_experience_hub/api/controllers/simbot/clients.py @@ -5,6 +5,7 @@ from loguru import logger from pydantic import BaseModel +from pathlib import Path from emma_experience_hub.api.clients import ( Client, @@ -24,6 +25,7 @@ SimBotPlaceholderVisionClient, SimBotQAIntentClient, SimBotSessionDbClient, + SimBotSQLLiteClient, ) from emma_experience_hub.common.settings import SimBotSettings @@ -36,7 +38,7 @@ class SimBotControllerClients(BaseModel, arbitrary_types_allowed=True): features: SimBotFeaturesClient nlu_intent: SimBotNLUIntentClient action_predictor: SimbotActionPredictionClient - session_db: SimBotSessionDbClient + session_db: SimBotSQLLiteClient profanity_filter: ProfanityFilterClient out_of_domain_detector: OutOfDomainDetectorClient confirmation_response_classifier: ConfirmationResponseClassifierClient @@ -66,9 +68,8 @@ def from_simbot_settings(cls, simbot_settings: SimBotSettings) -> "SimBotControl timeout=simbot_settings.client_timeout, ), ), - session_db=SimBotSessionDbClient( - resource_region=simbot_settings.session_db_region, - table_name=simbot_settings.session_db_memory_table_name, + session_db=SimBotSQLLiteClient( + db_file=Path(simbot_settings.session_local_db_file), ), nlu_intent=SimBotNLUIntentClient( endpoint=simbot_settings.nlu_predictor_url, @@ -100,6 +101,7 @@ def from_simbot_settings(cls, simbot_settings: SimBotSettings) -> "SimBotControl simbot_hacks=SimBotHacksClient( endpoint=simbot_settings.simbot_hacks_url, timeout=simbot_settings.client_timeout, + disable=not simbot_settings.feature_flags.enable_simbot_raw_text_match, ), simbot_qa=SimBotQAIntentClient( endpoint=simbot_settings.simbot_qa_url, @@ -147,7 +149,6 @@ def _break_from_sleep(self, signum: int, _frame: Any) -> None: def _healthcheck_all_clients(self) -> bool: """Check all the clients are healthy and running.""" clients: list[Client] = list(self.dict().values()) - for client in clients: if not client.healthcheck(): logger.exception(f"Failed to verify the healthcheck for client `{client}`") diff --git a/src/emma_experience_hub/api/controllers/simbot/controller.py b/src/emma_experience_hub/api/controllers/simbot/controller.py index c5085d3a..cae45bf6 100644 --- a/src/emma_experience_hub/api/controllers/simbot/controller.py +++ b/src/emma_experience_hub/api/controllers/simbot/controller.py @@ -229,6 +229,8 @@ def upload_cache_to_s3(self, session_id: str, prediction_request_id: str) -> Non If the file does not exist, we don't care so just suppress that exception. """ + # Dont upload anything to s3 + return None with suppress(FileNotFoundError): self.clients.features.auxiliary_metadata_cache_client.upload_to_s3( session_id, prediction_request_id diff --git a/src/emma_experience_hub/api/controllers/simbot/pipelines.py b/src/emma_experience_hub/api/controllers/simbot/pipelines.py index 9419701e..44553b17 100644 --- a/src/emma_experience_hub/api/controllers/simbot/pipelines.py +++ b/src/emma_experience_hub/api/controllers/simbot/pipelines.py @@ -104,6 +104,7 @@ def from_clients( _enable_search_after_no_match=simbot_settings.feature_flags.enable_search_after_no_match, _enable_search_after_missing_inventory=simbot_settings.feature_flags.enable_search_after_missing_inventory, _enable_high_level_planner=simbot_settings.feature_flags.enable_rasa_high_level_planner, + _enable_simbot_raw_text_match=simbot_settings.feature_flags.enable_simbot_raw_text_match, ), agent_action_generator=SimBotAgentActionGenerationPipeline( features_client=clients.features, @@ -112,6 +113,7 @@ def from_clients( previous_action_parser=SimBotPreviousActionParser(), find_object_pipeline=find_object, simbot_hacks_client=clients.simbot_hacks, + _enable_simbot_raw_text_match=simbot_settings.feature_flags.enable_simbot_raw_text_match, ), agent_language_generator=SimBotAgentLanguageGenerationPipeline( prevent_default_response_as_lightweight=simbot_settings.feature_flags.prevent_default_response_as_lightweight diff --git a/src/emma_experience_hub/api/simbot.py b/src/emma_experience_hub/api/simbot.py index 99043685..cfa19a28 100644 --- a/src/emma_experience_hub/api/simbot.py +++ b/src/emma_experience_hub/api/simbot.py @@ -24,7 +24,7 @@ async def startup_event() -> None: """Handle the startup of the API.""" simbot_settings = SimBotSettings.from_env() - boto3.setup_default_session(profile_name=simbot_settings.aws_profile) + # boto3.setup_default_session(profile_name=simbot_settings.aws_profile) state["controller"] = SimBotController.from_simbot_settings(simbot_settings) diff --git a/src/emma_experience_hub/common/settings/simbot.py b/src/emma_experience_hub/common/settings/simbot.py index b420e115..1378d4d0 100644 --- a/src/emma_experience_hub/common/settings/simbot.py +++ b/src/emma_experience_hub/common/settings/simbot.py @@ -42,6 +42,7 @@ class SimBotFeatureFlags(BaseModel): enable_search_after_missing_inventory: bool = True enable_search_after_no_match: bool = True enable_simbot_qa: bool = True + enable_simbot_raw_text_match: bool = True prevent_default_response_as_lightweight: bool = True @@ -69,6 +70,7 @@ def set_offline_evaluation_flags( values["enable_search_after_missing_inventory"] = False values["enable_simbot_qa"] = False values["prevent_default_response_as_lightweight"] = False + values["enable_simbot_raw_text_match"] = False values["gfh_location_type"] = GFHLocationType.viewpoint return values @@ -100,6 +102,9 @@ class SimBotSettings(BaseSettings): extracted_features_cache_dir: DirectoryPath session_db_memory_table_name: str = "SIMBOT_MEMORY_TABLE" + session_local_db_file: str = ( + "/home/gmp2000/simbot-offline-inference/storage/experience-hub/storage/local_sessions.db" + ) session_db_region: str = "us-east-1" feature_extractor_url: AnyHttpUrl = AnyHttpUrl(url=f"{scheme}://0.0.0.0:5500", scheme=scheme) diff --git a/src/emma_experience_hub/functions/simbot/agent_intent_selection/instruction_handler.py b/src/emma_experience_hub/functions/simbot/agent_intent_selection/instruction_handler.py index 1b9b608d..64561a7e 100644 --- a/src/emma_experience_hub/functions/simbot/agent_intent_selection/instruction_handler.py +++ b/src/emma_experience_hub/functions/simbot/agent_intent_selection/instruction_handler.py @@ -16,6 +16,7 @@ SimBotNLUIntentType, SimBotSession, SimBotUserSpeech, + SimBotUtterance, ) from emma_experience_hub.datamodels.simbot.queue import SimBotQueueUtterance from emma_experience_hub.parsers import NeuralParser @@ -39,6 +40,7 @@ def __init__( _enable_search_after_no_match: bool = True, _enable_search_after_missing_inventory: bool = True, _enable_high_level_planner: bool = True, + _enable_simbot_raw_text_match: bool = True, ) -> None: self._features_client = features_client @@ -54,6 +56,7 @@ def __init__( self._enable_search_after_no_match = _enable_search_after_no_match self._enable_missing_inventory = _enable_search_after_missing_inventory self._enable_high_level_planner = _enable_high_level_planner + self._enable_simbot_raw_text_match = _enable_simbot_raw_text_match def run(self, session: SimBotSession) -> Optional[SimBotAgentIntents]: """Get the agent intent.""" @@ -68,9 +71,10 @@ def run(self, session: SimBotSession) -> Optional[SimBotAgentIntents]: if self._enable_high_level_planner: session = self._compound_splitter_pipeline.run_high_level_planner(session) # Check if the utterance matches one of the known templates - intents = self._process_utterance_with_raw_text_matcher(session) - if intents is not None: - return intents + if self._enable_simbot_raw_text_match: + intents = self._process_utterance_with_raw_text_matcher(session) + if intents is not None: + return intents # Otherwise, use the NLU to detect it intents = self._process_utterance_with_nlu(session) @@ -165,6 +169,15 @@ def _process_utterance_with_nlu(self, session: SimBotSession) -> SimBotAgentInte session.update_agent_memory(extracted_features) logger.debug(f"Extracted intent: {intent}") + if not intent.type.triggers_question_to_user: + new_utterance = session.current_turn.speech.utterance.split("<>")[0].strip() + session.current_turn.speech = SimBotUserSpeech( + modified_utterance=SimBotUtterance( + utterance=new_utterance, role=session.current_turn.speech.role + ), + original_utterance=session.current_turn.speech.original_utterance, + ) + if not self._enable_clarification_questions and intent.type.triggers_question_to_user: logger.info( "Clarification questions are disabled; returning the `` intent." diff --git a/src/emma_experience_hub/pipelines/simbot/agent_action_generation.py b/src/emma_experience_hub/pipelines/simbot/agent_action_generation.py index 4e481e96..d0e8638f 100644 --- a/src/emma_experience_hub/pipelines/simbot/agent_action_generation.py +++ b/src/emma_experience_hub/pipelines/simbot/agent_action_generation.py @@ -43,6 +43,7 @@ def __init__( previous_action_parser: SimBotPreviousActionParser, find_object_pipeline: SimBotFindObjectPipeline, simbot_hacks_client: SimBotHacksClient, + _enable_simbot_raw_text_match: bool = True, ) -> None: self._features_client = features_client self._action_predictor_client = action_predictor_client @@ -51,6 +52,7 @@ def __init__( self._simbot_hacks_client = simbot_hacks_client self._find_object_pipeline = find_object_pipeline self._viewpoint_action_planner = ViewpointPlanner() + self._enable_simbot_raw_text_match = _enable_simbot_raw_text_match def run(self, session: SimBotSession) -> Optional[SimBotAction]: """Generate an action to perform on the environment.""" @@ -76,10 +78,12 @@ def run(self, session: SimBotSession) -> Optional[SimBotAction]: def handle_act_intent(self, session: SimBotSession) -> Optional[SimBotAction]: """Generate an action when we want to just act.""" - try: - return self._predict_action_from_template_matching(session) - except Exception: - return self._predict_action_from_emma_policy(session) + if self._enable_simbot_raw_text_match: + try: + return self._predict_action_from_template_matching(session) + except Exception: + return self._predict_action_from_emma_policy(session) + return self._predict_action_from_emma_policy(session) def handle_act_previous_intent(self, session: SimBotSession) -> Optional[SimBotAction]: """Get the action from the previous turn.""" diff --git a/src/emma_experience_hub/pipelines/simbot/agent_intent_selection.py b/src/emma_experience_hub/pipelines/simbot/agent_intent_selection.py index a0d07c6b..9323d560 100644 --- a/src/emma_experience_hub/pipelines/simbot/agent_intent_selection.py +++ b/src/emma_experience_hub/pipelines/simbot/agent_intent_selection.py @@ -57,6 +57,7 @@ def __init__( _enable_search_after_no_match: bool = True, _enable_search_after_missing_inventory: bool = True, _enable_high_level_planner: bool = True, + _enable_simbot_raw_text_match: bool = True, ) -> None: self.clarification_handler = SimBotClarificationHandler() self._features_client = features_client @@ -79,6 +80,7 @@ def __init__( _enable_search_after_no_match=_enable_search_after_no_match, _enable_search_after_missing_inventory=_enable_search_after_missing_inventory, _enable_high_level_planner=_enable_high_level_planner, + _enable_simbot_raw_text_match=_enable_simbot_raw_text_match, ) self._environment_error_pipeline = environment_error_pipeline diff --git a/src/emma_experience_hub/pipelines/simbot/request_processing.py b/src/emma_experience_hub/pipelines/simbot/request_processing.py index f651975b..952c0858 100644 --- a/src/emma_experience_hub/pipelines/simbot/request_processing.py +++ b/src/emma_experience_hub/pipelines/simbot/request_processing.py @@ -2,7 +2,6 @@ from loguru import logger -from emma_experience_hub.api.clients.simbot import SimBotSessionDbClient from emma_experience_hub.datamodels.simbot import ( SimBotActionStatus, SimBotRequest, @@ -14,7 +13,7 @@ class SimBotRequestProcessingPipeline: """Process the incoming requests and build the session data.""" - def __init__(self, session_db_client: SimBotSessionDbClient) -> None: + def __init__(self, session_db_client) -> None: self._session_db_client = session_db_client def run(self, request: SimBotRequest) -> SimBotSession: From e29a655b29496b6c8fa07f374def1392de3cb771 Mon Sep 17 00:00:00 2001 From: MalvinaNikandrou Date: Sat, 2 Dec 2023 11:11:03 +0000 Subject: [PATCH 4/8] Remove prior memory and added test for simbot controller --- .../api/clients/emma_policy.py | 1 - .../api/clients/simbot/cache.py | 23 +-------- .../api/clients/simbot/session_local_db.py | 21 +------- .../api/controllers/simbot/clients.py | 5 +- .../api/controllers/simbot/controller.py | 18 ------- src/emma_experience_hub/api/simbot.py | 7 --- .../commands/simbot/cli.py | 2 +- src/emma_experience_hub/commands/teach/cli.py | 4 +- .../common/settings/simbot.py | 4 +- src/emma_experience_hub/constants/simbot.py | 18 ------- .../datamodels/simbot/agent_memory.py | 49 ++----------------- .../instruction_handler.py | 2 +- .../functions/simbot/grab_from_history.py | 46 +---------------- .../pipelines/simbot/request_processing.py | 3 +- tests/controller/test_simbot_controller.py | 18 +++++++ tests/fixtures/paths.py | 37 ++++++++++++++ 16 files changed, 72 insertions(+), 186 deletions(-) create mode 100644 tests/controller/test_simbot_controller.py diff --git a/src/emma_experience_hub/api/clients/emma_policy.py b/src/emma_experience_hub/api/clients/emma_policy.py index ba419033..7e8aa9f1 100644 --- a/src/emma_experience_hub/api/clients/emma_policy.py +++ b/src/emma_experience_hub/api/clients/emma_policy.py @@ -39,7 +39,6 @@ def _make_request( emma_policy_request = EmmaPolicyRequest( environment_history=environment_state_history, dialogue_history=dialogue_history, - inventory=inventory_entity, force_stop_token=force_stop_token, ) logger.debug(f"Sending {emma_policy_request.num_images} images.") diff --git a/src/emma_experience_hub/api/clients/simbot/cache.py b/src/emma_experience_hub/api/clients/simbot/cache.py index 6397431f..a9922e87 100644 --- a/src/emma_experience_hub/api/clients/simbot/cache.py +++ b/src/emma_experience_hub/api/clients/simbot/cache.py @@ -14,7 +14,6 @@ from typing import Generic, Optional, TypeVar, Union import torch -from cloudpathlib import S3Client, S3Path from opentelemetry import trace from emma_experience_hub.api.clients.client import Client @@ -35,16 +34,12 @@ class SimBotCacheClient(Client, Generic[T]): def __init__( self, - bucket_name: str, local_cache_dir: Path, object_prefix: Optional[str] = None, ) -> None: - self.bucket = bucket_name self.prefix = object_prefix if object_prefix is not None else "" self._local_cache_dir = local_cache_dir - # self._s3 = S3Client(local_cache_dir=self._local_cache_dir) - def healthcheck(self) -> bool: """Healthcheck for the client.""" return self._local_cache_dir.exists() @@ -61,14 +56,6 @@ def load(self, session_id: str, prediction_request_id: str) -> T: """Load the data, converting from bytes to the object.""" raise NotImplementedError() - def upload_to_s3(self, session_id: str, prediction_request_id: str) -> None: - """Load the cached data to S3.""" - # Dont upload anything to s3 - return None - cached_data = self._load_bytes(session_id, prediction_request_id) - s3_path = self._create_s3_path(session_id, prediction_request_id) - s3_path.write_bytes(cached_data) - def _save_bytes(self, data: bytes, session_id: str, prediction_request_id: str) -> None: """Save the data.""" destination_path = self._create_local_path(session_id, prediction_request_id) @@ -86,14 +73,6 @@ def _create_local_path(self, session_id: str, prediction_request_id: str) -> Pat Path(f"{session_id}/{str(prediction_request_id)}.{self.suffix}") ) - def _create_s3_path(self, session_id: str, prediction_request_id: str) -> S3Path: - """Build the name of the object on S3.""" - object_name = "/".join( - [self.prefix, session_id, f"{str(prediction_request_id)}.{self.suffix}"] - ).lstrip("/") - - return self._s3.CloudPath(f"s3://{self.bucket}/{object_name}") - class SimBotPydanticCacheClient(PydanticClientMixin[PydanticT], SimBotCacheClient[PydanticT]): """Cache Pydantic models for SimBot. @@ -121,7 +100,7 @@ def load(self, session_id: str, prediction_request_id: str) -> PydanticT: class SimBotAuxiliaryMetadataClient(SimBotPydanticCacheClient[SimBotAuxiliaryMetadataPayload]): - """Cache auxiliary metadata on S3.""" + """Cache auxiliary metadata.""" model = SimBotAuxiliaryMetadataPayload suffix = "json" diff --git a/src/emma_experience_hub/api/clients/simbot/session_local_db.py b/src/emma_experience_hub/api/clients/simbot/session_local_db.py index a9ce95ec..72e931c6 100644 --- a/src/emma_experience_hub/api/clients/simbot/session_local_db.py +++ b/src/emma_experience_hub/api/clients/simbot/session_local_db.py @@ -1,10 +1,9 @@ +import sqlite3 from concurrent.futures import ThreadPoolExecutor from pathlib import Path -import sqlite3 from loguru import logger -from emma_experience_hub.api.clients.client import Client from emma_experience_hub.datamodels.simbot import SimBotSessionTurn @@ -19,7 +18,7 @@ def __init__(self, db_file: Path) -> None: self._db_file = db_file self.create_table() - def create_table(self) -> None: # noqa: WPS231 + def create_table(self) -> None: """Create table.""" if self._db_file.exists(): return @@ -161,19 +160,3 @@ def _get_all_session_turns(self, session_id: str) -> list[tuple[str, int, str]]: connection.close() return turns - - -# db = SimBotSQLLiteClient( -# Path("/home/gmp2000/simbot-offline-inference/storage/experience-hub/storage/local_sessions.db") -# ) - -# session_turns = db.get_all_session_turns("T1_39786908-bee2-48a5-bf33-6640de5b80e8") - -# for turn in session_turns: -# print(turn.prediction_request_id) -# if turn.speech: -# print(turn.speech.original_utterance.utterance, turn.speech.modified_utterance.utterance) -# print(turn.actions.interaction.raw_output) -# if turn.actions.interaction.status: -# print(turn.actions.interaction.status.success) -# breakpoint() diff --git a/src/emma_experience_hub/api/controllers/simbot/clients.py b/src/emma_experience_hub/api/controllers/simbot/clients.py index 1117db05..e2db85cc 100644 --- a/src/emma_experience_hub/api/controllers/simbot/clients.py +++ b/src/emma_experience_hub/api/controllers/simbot/clients.py @@ -1,11 +1,11 @@ import signal +from pathlib import Path from threading import Event from time import sleep from typing import Any from loguru import logger from pydantic import BaseModel -from pathlib import Path from emma_experience_hub.api.clients import ( Client, @@ -24,7 +24,6 @@ SimBotNLUIntentClient, SimBotPlaceholderVisionClient, SimBotQAIntentClient, - SimBotSessionDbClient, SimBotSQLLiteClient, ) from emma_experience_hub.common.settings import SimBotSettings @@ -52,7 +51,6 @@ def from_simbot_settings(cls, simbot_settings: SimBotSettings) -> "SimBotControl return cls( features=SimBotFeaturesClient( auxiliary_metadata_cache_client=SimBotAuxiliaryMetadataClient( - bucket_name=simbot_settings.simbot_cache_s3_bucket, local_cache_dir=simbot_settings.auxiliary_metadata_cache_dir, ), feature_extractor_client=FeatureExtractorClient( @@ -60,7 +58,6 @@ def from_simbot_settings(cls, simbot_settings: SimBotSettings) -> "SimBotControl timeout=simbot_settings.client_timeout, ), features_cache_client=SimBotExtractedFeaturesClient( - bucket_name=simbot_settings.simbot_cache_s3_bucket, local_cache_dir=simbot_settings.extracted_features_cache_dir, ), placeholder_vision_client=SimBotPlaceholderVisionClient( diff --git a/src/emma_experience_hub/api/controllers/simbot/controller.py b/src/emma_experience_hub/api/controllers/simbot/controller.py index cae45bf6..1f5143b3 100644 --- a/src/emma_experience_hub/api/controllers/simbot/controller.py +++ b/src/emma_experience_hub/api/controllers/simbot/controller.py @@ -1,5 +1,3 @@ -from contextlib import suppress - from loguru import logger from opentelemetry import trace @@ -223,22 +221,6 @@ def generate_language_action_if_needed(self, session: SimBotSession) -> SimBotSe logger.info(f"[ACTION] Dialog: `{session.current_turn.actions.dialog}`") return session - @tracer.start_as_current_span("Upload cache to S3") - def upload_cache_to_s3(self, session_id: str, prediction_request_id: str) -> None: - """Upload the cached data to S3. - - If the file does not exist, we don't care so just suppress that exception. - """ - # Dont upload anything to s3 - return None - with suppress(FileNotFoundError): - self.clients.features.auxiliary_metadata_cache_client.upload_to_s3( - session_id, prediction_request_id - ) - self.clients.features.features_cache_client.upload_to_s3( - session_id, prediction_request_id - ) - def _upload_session_turn_to_database(self, session: SimBotSession) -> None: """Upload the current session turn to the database.""" self.clients.session_db.add_session_turn(session.current_turn) diff --git a/src/emma_experience_hub/api/simbot.py b/src/emma_experience_hub/api/simbot.py index cfa19a28..2c433a06 100644 --- a/src/emma_experience_hub/api/simbot.py +++ b/src/emma_experience_hub/api/simbot.py @@ -1,6 +1,5 @@ from typing import Literal -import boto3 from fastapi import BackgroundTasks, FastAPI, Request, Response, status from loguru import logger from opentelemetry import trace @@ -72,12 +71,6 @@ async def handle_request_from_simbot_arena( # Handle the request simbot_response = state["controller"].handle_request_from_simbot_arena(simbot_request) - background_tasks.add_task( - state["controller"].upload_cache_to_s3, - simbot_response.session_id, - simbot_response.prediction_request_id, - ) - # Return response logger.info(f"Returning the response {simbot_response.json(by_alias=True)}") return simbot_response diff --git a/src/emma_experience_hub/commands/simbot/cli.py b/src/emma_experience_hub/commands/simbot/cli.py index ccbfb304..a7d3fb8a 100644 --- a/src/emma_experience_hub/commands/simbot/cli.py +++ b/src/emma_experience_hub/commands/simbot/cli.py @@ -233,7 +233,7 @@ def run_controller_api( ), auxiliary_metadata_cache_dir: Path = typer.Option( ..., - help="Local directory to store the cached auxiliary metadata before uploading to S3.", + help="Local directory to store the cached auxiliary metadata.", writable=True, exists=True, rich_help_panel="Directories", diff --git a/src/emma_experience_hub/commands/teach/cli.py b/src/emma_experience_hub/commands/teach/cli.py index 0522ebfe..87a305a4 100644 --- a/src/emma_experience_hub/commands/teach/cli.py +++ b/src/emma_experience_hub/commands/teach/cli.py @@ -41,10 +41,10 @@ @app.command() def prepare_everything( remote_perception_model_uri: str = typer.Option( - ..., envvar="REMOTE_PERCEPTION_MODEL_URI", help="URI for the Perception model file on S3." + ..., envvar="REMOTE_PERCEPTION_MODEL_URI", help="URI for the Perception model file." ), remote_policy_model_uri: str = typer.Option( - ..., envvar="REMOTE_POLICY_MODEL_URI", help="URI for the Policy model file on S3." + ..., envvar="REMOTE_POLICY_MODEL_URI", help="URI for the Policy model file." ), count: Optional[int] = typer.Option( None, diff --git a/src/emma_experience_hub/common/settings/simbot.py b/src/emma_experience_hub/common/settings/simbot.py index 1378d4d0..31ffaa07 100644 --- a/src/emma_experience_hub/common/settings/simbot.py +++ b/src/emma_experience_hub/common/settings/simbot.py @@ -102,9 +102,7 @@ class SimBotSettings(BaseSettings): extracted_features_cache_dir: DirectoryPath session_db_memory_table_name: str = "SIMBOT_MEMORY_TABLE" - session_local_db_file: str = ( - "/home/gmp2000/simbot-offline-inference/storage/experience-hub/storage/local_sessions.db" - ) + session_local_db_file: str = "storage/local_sessions.db" session_db_region: str = "us-east-1" feature_extractor_url: AnyHttpUrl = AnyHttpUrl(url=f"{scheme}://0.0.0.0:5500", scheme=scheme) diff --git a/src/emma_experience_hub/constants/simbot.py b/src/emma_experience_hub/constants/simbot.py index 4cbc5f99..7abce906 100644 --- a/src/emma_experience_hub/constants/simbot.py +++ b/src/emma_experience_hub/constants/simbot.py @@ -92,24 +92,6 @@ def get_feedback_rules() -> list[dict[str, str]]: return [{**rule, "id": str(idx)} for idx, rule in enumerate(raw_rule_reader, 2)] -@lru_cache(maxsize=1) -def get_prior_memory() -> dict[str, str]: - """Load prior memory of object location in rooms from file.""" - return {} - json_path = constants_absolute_path.joinpath("simbot", "prior_memory.json") - with open(json_path) as json_file: - return json.load(json_file)["fixed_locations"] - - -@lru_cache(maxsize=1) -def get_prior_memory_candidates() -> dict[str, list[str]]: - """Load prior memory of object location in candidate rooms from file.""" - return {} - json_path = constants_absolute_path.joinpath("simbot", "prior_memory.json") - with open(json_path) as json_file: - return json.load(json_file)["candidate_locations"] - - @lru_cache(maxsize=1) def get_search_budget() -> dict[str, SimBotRoomSearchBudget]: """Load the search_budget per room from file.""" diff --git a/src/emma_experience_hub/datamodels/simbot/agent_memory.py b/src/emma_experience_hub/datamodels/simbot/agent_memory.py index 6a21fbd7..f9718395 100644 --- a/src/emma_experience_hub/datamodels/simbot/agent_memory.py +++ b/src/emma_experience_hub/datamodels/simbot/agent_memory.py @@ -2,7 +2,6 @@ from pydantic import BaseModel, Field -from emma_experience_hub.constants.simbot import get_prior_memory, get_prior_memory_candidates from emma_experience_hub.datamodels import EmmaExtractedFeatures from emma_experience_hub.datamodels.common import ArenaLocation, Position, RotationQuaternion from emma_experience_hub.datamodels.simbot.actions import SimBotAction @@ -35,8 +34,6 @@ class SimBotObjectMemory(BaseModel): """Track all the observed objects and their closest viewpoints.""" memory: dict[str, SimBotRoomMemoryType] = {} - _prior_memory: dict[str, str] = get_prior_memory() - _prior_memory_candidates: dict[str, list[str]] = get_prior_memory_candidates() def update_from_action( # noqa: WPS231 self, @@ -132,50 +129,12 @@ def read_memory_entity_in_arena(self, object_label: str) -> list[tuple[str, Aren found_object_locations.append((room, memory_location)) return found_object_locations - def read_prior_memory_entity_in_arena(self, object_label: str) -> Optional[str]: - """Find object room based on prior memory.""" - return self._prior_memory.get(object_label.lower(), None) - def object_in_memory(self, object_label: str, current_room: str) -> bool: """Is the object in the current room or prior memory?""" - if self.read_memory_entity_in_room(room_name=current_room, object_label=object_label): - return True - object_label = object_label.lower() - - if object_label in self._prior_memory_candidates: - return True - - return self._prior_memory.get(object_label, None) is not None - - def get_other_interacted_room(self, object_label: str) -> Optional[str]: - """Get the room where the object was interacted with.""" - # Check if the object has multiple prior memory room candidates. - object_label = object_label.lower() - prior_room_candidates = self._prior_memory_candidates.get(object_label, None) - if prior_room_candidates is None: - return None - - # Find the most recent room where the object was interacted with. - object_memory_entry = [ - self.memory[room].get(object_label) if room in self.memory else None - for room in prior_room_candidates - ] - interaction_turns = [ - entry.interaction_turn if entry else -1 for entry in object_memory_entry - ] - most_recent_interaction_turn = max(interaction_turns) - if most_recent_interaction_turn < 0: - return None - return prior_room_candidates[interaction_turns.index(most_recent_interaction_turn)] - - def get_entity_room_candidate(self, object_label: str) -> Optional[list[str]]: - """Get all candidate rooms based on prior memory.""" - object_label = object_label.lower() - prior_memory_room = self._prior_memory.get(object_label, None) - if prior_memory_room is not None: - return [prior_memory_room] - - return self._prior_memory_candidates.get(object_label, None) + return ( + self.read_memory_entity_in_room(room_name=current_room, object_label=object_label) + is not None + ) def write_memory_entities_in_room( self, diff --git a/src/emma_experience_hub/functions/simbot/agent_intent_selection/instruction_handler.py b/src/emma_experience_hub/functions/simbot/agent_intent_selection/instruction_handler.py index 64561a7e..afa12495 100644 --- a/src/emma_experience_hub/functions/simbot/agent_intent_selection/instruction_handler.py +++ b/src/emma_experience_hub/functions/simbot/agent_intent_selection/instruction_handler.py @@ -169,7 +169,7 @@ def _process_utterance_with_nlu(self, session: SimBotSession) -> SimBotAgentInte session.update_agent_memory(extracted_features) logger.debug(f"Extracted intent: {intent}") - if not intent.type.triggers_question_to_user: + if not intent.type.triggers_question_to_user and session.current_turn.speech is not None: new_utterance = session.current_turn.speech.utterance.split("<>")[0].strip() session.current_turn.speech = SimBotUserSpeech( modified_utterance=SimBotUtterance( diff --git a/src/emma_experience_hub/functions/simbot/grab_from_history.py b/src/emma_experience_hub/functions/simbot/grab_from_history.py index 376a500d..ac53481e 100644 --- a/src/emma_experience_hub/functions/simbot/grab_from_history.py +++ b/src/emma_experience_hub/functions/simbot/grab_from_history.py @@ -14,7 +14,7 @@ class GrabFromHistory: """Grab from History class.""" - def __call__( # noqa: WPS212 + def __call__( self, session: SimBotSession, search_planner: SearchPlanner, @@ -30,17 +30,6 @@ def __call__( # noqa: WPS212 current_room = session.current_turn.environment.current_room - # Have we interacted with the object in another room? - previous_interaction_room = session.current_state.memory.get_other_interacted_room( - object_label=searchable_object - ) - if previous_interaction_room is not None and previous_interaction_room != current_room: - return self._goto_room_before_search( - session=session, - searchable_object=searchable_object, - room=previous_interaction_room, - ) - # Have we seen the object in the current room? gfh_location = session.current_state.memory.read_memory_entity_in_room( room_name=current_room, object_label=searchable_object @@ -52,38 +41,7 @@ def __call__( # noqa: WPS212 gfh_location = None return search_planner.run(session, gfh_location=gfh_location) - # Is it an object we should search in different rooms? - # 1. Get the candidate rooms - # 2. If there are no candidate rooms or the current room is in the candidates - search in - # the current room - # 3. If there are multiple candidate rooms, ask for confirmation - # 4. If there is only one candidate room, go to that room - - gfh_prior_memory_room_candidates = session.current_state.memory.get_entity_room_candidate( - object_label=searchable_object - ) - if gfh_prior_memory_room_candidates is None: - logger.debug( - f"Could not retrieve {searchable_object} from memory {session.current_state.memory}" - ) - return search_planner.run(session) - # If prior knowlwedge says that the object is in the current room start searching - if current_room in gfh_prior_memory_room_candidates: - return search_planner.run(session) - - gfh_prior_memory_room = gfh_prior_memory_room_candidates[0] - if len(gfh_prior_memory_room_candidates) > 1: - return self._ask_for_confirmation_before_search( - session=session, searchable_object=searchable_object, room=gfh_prior_memory_room - ) - - # Otherwise, just go to the room - logger.debug( - f"Found object {searchable_object} in prior memory room {gfh_prior_memory_room}" - ) - return self._goto_room_before_search( - session=session, searchable_object=searchable_object, room=gfh_prior_memory_room - ) + return search_planner.run(session) def _goto_room_before_search( self, session: SimBotSession, room: str, searchable_object: str diff --git a/src/emma_experience_hub/pipelines/simbot/request_processing.py b/src/emma_experience_hub/pipelines/simbot/request_processing.py index 952c0858..19a4e93f 100644 --- a/src/emma_experience_hub/pipelines/simbot/request_processing.py +++ b/src/emma_experience_hub/pipelines/simbot/request_processing.py @@ -2,6 +2,7 @@ from loguru import logger +from emma_experience_hub.api.clients.simbot import SimBotSQLLiteClient from emma_experience_hub.datamodels.simbot import ( SimBotActionStatus, SimBotRequest, @@ -13,7 +14,7 @@ class SimBotRequestProcessingPipeline: """Process the incoming requests and build the session data.""" - def __init__(self, session_db_client) -> None: + def __init__(self, session_db_client: SimBotSQLLiteClient) -> None: self._session_db_client = session_db_client def run(self, request: SimBotRequest) -> SimBotSession: diff --git a/tests/controller/test_simbot_controller.py b/tests/controller/test_simbot_controller.py new file mode 100644 index 00000000..d23db6be --- /dev/null +++ b/tests/controller/test_simbot_controller.py @@ -0,0 +1,18 @@ +from typing import Any + +from pytest_cases import parametrize_with_cases + +from emma_experience_hub.api.controllers import SimBotController +from emma_experience_hub.common.settings import SimBotSettings +from emma_experience_hub.datamodels.simbot import SimBotRequest +from tests.fixtures.simbot_api_requests import SimBotRequestCases + + +@parametrize_with_cases("request_body", cases=SimBotRequestCases) +def test_simbot_api( + request_body: dict[str, Any], + simbot_settings: SimBotSettings, +) -> None: + simbot_request = SimBotRequest.parse_obj(request_body) + controller = SimBotController.from_simbot_settings(simbot_settings) + controller.handle_request_from_simbot_arena(simbot_request) diff --git a/tests/fixtures/paths.py b/tests/fixtures/paths.py index 998e35d9..b358cebd 100644 --- a/tests/fixtures/paths.py +++ b/tests/fixtures/paths.py @@ -1,8 +1,14 @@ +import os from pathlib import Path from typing import Any from pytest_cases import fixture +from emma_experience_hub.common.settings import SimBotSettings + + +os.environ["profile"] = "offline" + @fixture(scope="session") def storage_root() -> Path: @@ -20,3 +26,34 @@ def fixtures_root(storage_root: Path) -> Path: def cache_root(request: Any) -> Path: """Root of cached dir for the tests.""" return Path(request.config.cache._cachedir) + + +@fixture(scope="session") +def auxiliary_metadata_dir(fixtures_root: Path) -> Path: + """Path to the auxilary game metadata.""" + return fixtures_root.joinpath("simbot/game_metadata/") + + +@fixture(scope="session") +def auxiliary_metadata_cache_dir(fixtures_root: Path) -> Path: + """Path to the auxilary the metadata cache.""" + return fixtures_root.joinpath("simbot/metadata_cache_dir/") + + +@fixture(scope="session") +def features_cache_dir(fixtures_root: Path) -> Path: + """Path to the auxilary features cache.""" + return fixtures_root.joinpath("simbot/features/") + + +@fixture(scope="session") +def simbot_settings( + auxiliary_metadata_dir: Path, auxiliary_metadata_cache_dir: Path, features_cache_dir: Path +) -> SimBotSettings: + """Settings.""" + simbot_settings = SimBotSettings( + auxiliary_metadata_dir=auxiliary_metadata_dir, + auxiliary_metadata_cache_dir=auxiliary_metadata_cache_dir, + extracted_features_cache_dir=features_cache_dir, + ) + return simbot_settings From b897ee69f68b1067c97ca75dcd75b40e04caf97f Mon Sep 17 00:00:00 2001 From: MalvinaNikandrou Date: Sat, 2 Dec 2023 11:14:46 +0000 Subject: [PATCH 5/8] Remove file and flag for prior memory --- .../common/settings/simbot.py | 2 -- .../constants/simbot/prior_memory.json | 31 ------------------- 2 files changed, 33 deletions(-) delete mode 100644 src/emma_experience_hub/constants/simbot/prior_memory.json diff --git a/src/emma_experience_hub/common/settings/simbot.py b/src/emma_experience_hub/common/settings/simbot.py index 31ffaa07..f0693c20 100644 --- a/src/emma_experience_hub/common/settings/simbot.py +++ b/src/emma_experience_hub/common/settings/simbot.py @@ -31,7 +31,6 @@ class SimBotFeatureFlags(BaseModel): enable_coreference_resolution: bool = True enable_confirmation_questions: bool = True enable_grab_from_history: bool = True - enable_prior_memory: bool = True enable_incomplete_utterances_intent: bool = True enable_object_related_questions_from_user: bool = True enable_out_of_domain_detector: bool = True @@ -60,7 +59,6 @@ def set_offline_evaluation_flags( values["enable_clarification_questions"] = False values["enable_compound_splitting"] = False values["enable_coreference_resolution"] = False - values["enable_prior_memory"] = False values["enable_confirmation_questions"] = False values["enable_incomplete_utterances_intent"] = False values["enable_object_related_questions_from_user"] = False diff --git a/src/emma_experience_hub/constants/simbot/prior_memory.json b/src/emma_experience_hub/constants/simbot/prior_memory.json deleted file mode 100644 index cb405860..00000000 --- a/src/emma_experience_hub/constants/simbot/prior_memory.json +++ /dev/null @@ -1,31 +0,0 @@ -{ - "fixed_locations": { - "coffee unmaker": "BreakRoom", - "color changer": "Lab2", - "emotion tester": "Lab1", - "fridge": "BreakRoom", - "freezer": "BreakRoom", - "freeze ray": "Lab1", - "freeze ray monitor": "Lab1", - "freeze ray shelf": "Lab1", - "generator": "Lab2", - "laser": "Lab1", - "laser monitor": "Lab1", - "laser shelf": "Lab1", - "microwave": "BreakRoom", - "robot arm": "Lab1", - "time machine": "BreakRoom", - "printer": "Lab1", - "gravity pad": "Lab2", - "gravity monitor": "Lab2", - "portal generator monitor": "Lab2", - "everything's a carrot machine": "Lab2" - }, - "candidate_locations": { - "embiggenator": ["Lab2", "Warehouse"], - "embiggenator monitor": ["Lab2", "Warehouse"], - "sink": ["BreakRoom", "Warehouse"], - "vending machine": ["BreakRoom", "Reception"], - "coffee maker": ["BreakRoom", "Reception"] - } -} From 08cf9bf987c0163daec2be7ae1d9d1ddd0c42413 Mon Sep 17 00:00:00 2001 From: MalvinaNikandrou Date: Sat, 2 Dec 2023 11:49:46 +0000 Subject: [PATCH 6/8] Remove any tracing --- .../api/clients/feature_extractor.py | 6 --- .../api/clients/simbot/action_prediction.py | 33 +++++------- .../api/clients/simbot/cache.py | 5 -- .../api/clients/simbot/features.py | 40 +++++--------- .../api/clients/simbot/hacks.py | 12 ++--- .../api/clients/simbot/nlu_intent.py | 18 +++---- .../api/clients/simbot/placeholder_vision.py | 4 -- .../api/clients/simbot/qa_intent.py | 5 +- .../api/controllers/simbot/controller.py | 53 +++++++------------ src/emma_experience_hub/api/simbot.py | 22 +++----- .../functions/simbot/special_tokens.py | 7 +-- .../simbot/feedback_from_session_context.py | 6 --- .../simbot/agent_action_generation.py | 45 ++++++---------- .../simbot/agent_language_generation.py | 10 +--- .../pipelines/simbot/anticipator.py | 4 -- .../pipelines/simbot/find_object.py | 13 +---- .../simbot/user_intent_extraction.py | 5 -- .../simbot/user_utterance_verification.py | 9 ---- 18 files changed, 86 insertions(+), 211 deletions(-) diff --git a/src/emma_experience_hub/api/clients/feature_extractor.py b/src/emma_experience_hub/api/clients/feature_extractor.py index 4b867132..5affeae6 100644 --- a/src/emma_experience_hub/api/clients/feature_extractor.py +++ b/src/emma_experience_hub/api/clients/feature_extractor.py @@ -6,7 +6,6 @@ import torch from loguru import logger from numpy.typing import ArrayLike -from opentelemetry import trace from PIL import Image from emma_common.datamodels import TorchDataMixin @@ -14,9 +13,6 @@ from emma_experience_hub.datamodels import EmmaExtractedFeatures -tracer = trace.get_tracer(__name__) - - class FeatureExtractorClient(Client): """API Client for sending requests to the feature extractor server.""" @@ -48,7 +44,6 @@ def change_device(self, device: torch.device) -> None: if response.status_code == httpx.codes.OK: logger.info(f"Feature extractor model moved to device `{device}`") - @tracer.start_as_current_span("Extract features from single image") def process_single_image(self, image: Union[Image.Image, ArrayLike]) -> EmmaExtractedFeatures: """Submit a request to the feature extraction server for a single image.""" image_bytes = self._convert_single_image_to_bytes(image) @@ -70,7 +65,6 @@ def process_single_image(self, image: Union[Image.Image, ArrayLike]) -> EmmaExtr return feature_response - @tracer.start_as_current_span("Extract features from image batch") def process_many_images( self, images: Union[list[Image.Image], list[ArrayLike]] ) -> list[EmmaExtractedFeatures]: diff --git a/src/emma_experience_hub/api/clients/simbot/action_prediction.py b/src/emma_experience_hub/api/clients/simbot/action_prediction.py index 3ddd9eef..dce5627b 100644 --- a/src/emma_experience_hub/api/clients/simbot/action_prediction.py +++ b/src/emma_experience_hub/api/clients/simbot/action_prediction.py @@ -1,14 +1,9 @@ from typing import Optional -from opentelemetry import trace - from emma_common.datamodels import DialogueUtterance, EnvironmentStateTurn from emma_experience_hub.api.clients.emma_policy import EmmaPolicyClient -tracer = trace.get_tracer(__name__) - - class SimbotActionPredictionClient(EmmaPolicyClient): """Action prediction client which interfaces with the Policy model.""" @@ -20,14 +15,13 @@ def generate( inventory_entity: Optional[str] = None, ) -> str: """Generate a response from the features and provided language.""" - with tracer.start_as_current_span("Generate action"): - return self._make_request( - f"{self._endpoint}/generate", - environment_state_history, - dialogue_history, - force_stop_token=force_stop_token, - inventory_entity=inventory_entity, - ) + return self._make_request( + f"{self._endpoint}/generate", + environment_state_history, + dialogue_history, + force_stop_token=force_stop_token, + inventory_entity=inventory_entity, + ) def find_object_in_scene( self, @@ -36,10 +30,9 @@ def find_object_in_scene( inventory_entity: Optional[str] = None, ) -> list[str]: """Generate a response from the features and provided language.""" - with tracer.start_as_current_span("Find object in scene"): - return self._make_request( - f"{self._endpoint}/generate_find", - environment_state_history, - dialogue_history, - inventory_entity=inventory_entity, - ) + return self._make_request( + f"{self._endpoint}/generate_find", + environment_state_history, + dialogue_history, + inventory_entity=inventory_entity, + ) diff --git a/src/emma_experience_hub/api/clients/simbot/cache.py b/src/emma_experience_hub/api/clients/simbot/cache.py index a9922e87..c81aaf3b 100644 --- a/src/emma_experience_hub/api/clients/simbot/cache.py +++ b/src/emma_experience_hub/api/clients/simbot/cache.py @@ -14,7 +14,6 @@ from typing import Generic, Optional, TypeVar, Union import torch -from opentelemetry import trace from emma_experience_hub.api.clients.client import Client from emma_experience_hub.api.clients.pydantic import PydanticClientMixin, PydanticT @@ -24,8 +23,6 @@ T = TypeVar("T") -tracer = trace.get_tracer(__name__) - class SimBotCacheClient(Client, Generic[T]): """Cache client for SimBot data.""" @@ -111,7 +108,6 @@ class SimBotExtractedFeaturesClient(SimBotCacheClient[list[EmmaExtractedFeatures suffix = "pt" - @tracer.start_as_current_span("Save extracted features") def save( self, data: list[EmmaExtractedFeatures], @@ -132,7 +128,6 @@ def save( # Write data self._save_bytes(data_buffer.getvalue(), session_id, prediction_request_id) - @tracer.start_as_current_span("Load extracted features from file") def load(self, session_id: str, prediction_request_id: str) -> list[EmmaExtractedFeatures]: """Load the extracted features from a single file.""" # Load the raw data using torch. diff --git a/src/emma_experience_hub/api/clients/simbot/features.py b/src/emma_experience_hub/api/clients/simbot/features.py index f426506b..85d2e470 100644 --- a/src/emma_experience_hub/api/clients/simbot/features.py +++ b/src/emma_experience_hub/api/clients/simbot/features.py @@ -1,5 +1,4 @@ from loguru import logger -from opentelemetry import trace from emma_experience_hub.api.clients.client import Client from emma_experience_hub.api.clients.feature_extractor import FeatureExtractorClient @@ -13,9 +12,6 @@ from emma_experience_hub.datamodels.simbot.payloads import SimBotAuxiliaryMetadataPayload -tracer = trace.get_tracer(__name__) - - class SimBotFeaturesClient(Client): """Extract features and cache them.""" @@ -41,12 +37,10 @@ def healthcheck(self) -> bool: ] ) - @tracer.start_as_current_span("Check if features exist") def check_exist(self, turn: SimBotSessionTurn) -> bool: """Check whether features already exist for the given turn.""" return self.features_cache_client.check_exist(turn.session_id, turn.prediction_request_id) - @tracer.start_as_current_span("Get features") def get_features(self, turn: SimBotSessionTurn) -> list[EmmaExtractedFeatures]: """Get the features for the given turn.""" logger.debug("Getting features for turn...") @@ -65,39 +59,33 @@ def get_features(self, turn: SimBotSessionTurn) -> list[EmmaExtractedFeatures]: return features - @tracer.start_as_current_span("Get auxiliary metadata") def get_auxiliary_metadata(self, turn: SimBotSessionTurn) -> SimBotAuxiliaryMetadataPayload: """Cache the auxiliary metadata for the given turn.""" # Check whether the auxiliary metadata exists within the cache - with tracer.start_as_current_span("Check auxiliary metadata cache"): - auxiliary_metadata_exists = self.auxiliary_metadata_cache_client.check_exist( - turn.session_id, turn.prediction_request_id - ) + auxiliary_metadata_exists = self.auxiliary_metadata_cache_client.check_exist( + turn.session_id, turn.prediction_request_id + ) # Load the auxiliary metadata from the cache or the EFS URI if auxiliary_metadata_exists: - with tracer.start_as_current_span("Load auxiliary metadata from cache"): - auxiliary_metadata = self.auxiliary_metadata_cache_client.load( - turn.session_id, turn.prediction_request_id - ) + auxiliary_metadata = self.auxiliary_metadata_cache_client.load( + turn.session_id, turn.prediction_request_id + ) else: - with tracer.start_as_current_span("Load auxiliary metadata from EFS"): - auxiliary_metadata = SimBotAuxiliaryMetadataPayload.from_efs_uri( - uri=turn.auxiliary_metadata_uri - ) + auxiliary_metadata = SimBotAuxiliaryMetadataPayload.from_efs_uri( + uri=turn.auxiliary_metadata_uri + ) # If it has not been cached, upload it to the cache if not auxiliary_metadata_exists: - with tracer.start_as_current_span("Save auxiliary metadata to cache"): - self.auxiliary_metadata_cache_client.save( - auxiliary_metadata, - turn.session_id, - turn.prediction_request_id, - ) + self.auxiliary_metadata_cache_client.save( + auxiliary_metadata, + turn.session_id, + turn.prediction_request_id, + ) return auxiliary_metadata - @tracer.start_as_current_span("Get mask for embiggenator") def get_mask_for_embiggenator(self, turn: SimBotSessionTurn) -> list[list[int]]: """Try to replace the object mask with the placeholder model output if needed.""" image = next(iter(self.get_auxiliary_metadata(turn).images)) diff --git a/src/emma_experience_hub/api/clients/simbot/hacks.py b/src/emma_experience_hub/api/clients/simbot/hacks.py index 9b29d9d2..ae3bd45b 100644 --- a/src/emma_experience_hub/api/clients/simbot/hacks.py +++ b/src/emma_experience_hub/api/clients/simbot/hacks.py @@ -4,15 +4,12 @@ import httpx from loguru import logger from methodtools import lru_cache -from opentelemetry import trace from pydantic import BaseModel from emma_experience_hub.api.clients import Client from emma_experience_hub.datamodels.simbot.actions import SimBotAction -tracer = trace.get_tracer(__name__) - LRU_CACHE_MAX_SIZE = 64 @@ -41,16 +38,14 @@ def healthcheck(self) -> bool: def get_low_level_prediction_from_raw_text(self, utterance: str) -> Optional[str]: """Generate a response from the provided language.""" - with tracer.start_as_current_span("Match text to template"): - response = self._get_low_level_prediction_from_raw_text(utterance) + response = self._get_low_level_prediction_from_raw_text(utterance) logger.debug(f"Cache info: {self._get_low_level_prediction_from_raw_text.cache_info()}") return response def get_room_prediction_from_raw_text(self, utterance: str) -> Optional[SimBotHacksRoom]: """Generate a room prediction from the provided language.""" - with tracer.start_as_current_span("Get room from raw text"): - response = self._get_room_prediction_from_raw_text(utterance) + response = self._get_room_prediction_from_raw_text(utterance) logger.debug(f"Cache info: {self._get_room_prediction_from_raw_text.cache_info()}") return response @@ -62,8 +57,7 @@ def get_anticipator_prediction_from_action( entity_labels: Optional[list[str]] = None, ) -> Optional[SimBotHacksAnticipator]: """Generate possible plan of instructions from the given action.""" - with tracer.start_as_current_span("Get anticipator plan"): - response = self._get_anticipated_instructions(action, inventory_entity, entity_labels) + response = self._get_anticipated_instructions(action, inventory_entity, entity_labels) logger.debug(f"Cache info: {self._get_room_prediction_from_raw_text.cache_info()}") return response diff --git a/src/emma_experience_hub/api/clients/simbot/nlu_intent.py b/src/emma_experience_hub/api/clients/simbot/nlu_intent.py index d578ef16..5792e415 100644 --- a/src/emma_experience_hub/api/clients/simbot/nlu_intent.py +++ b/src/emma_experience_hub/api/clients/simbot/nlu_intent.py @@ -1,14 +1,9 @@ from typing import Optional -from opentelemetry import trace - from emma_common.datamodels import DialogueUtterance, EnvironmentStateTurn from emma_experience_hub.api.clients.emma_policy import EmmaPolicyClient -tracer = trace.get_tracer(__name__) - - class SimBotNLUIntentClient(EmmaPolicyClient): """API Client for SimBot NLU.""" @@ -19,10 +14,9 @@ def generate( inventory_entity: Optional[str] = None, ) -> str: """Generate a response from the features and provided language.""" - with tracer.start_as_current_span("Generate NLU intent"): - return self._make_request( - f"{self._endpoint}/generate", - environment_state_history, - dialogue_history, - inventory_entity, - ) + return self._make_request( + f"{self._endpoint}/generate", + environment_state_history, + dialogue_history, + inventory_entity, + ) diff --git a/src/emma_experience_hub/api/clients/simbot/placeholder_vision.py b/src/emma_experience_hub/api/clients/simbot/placeholder_vision.py index 65c64f8b..3e88fd67 100644 --- a/src/emma_experience_hub/api/clients/simbot/placeholder_vision.py +++ b/src/emma_experience_hub/api/clients/simbot/placeholder_vision.py @@ -1,14 +1,10 @@ import httpx from loguru import logger -from opentelemetry import trace from PIL import Image from emma_experience_hub.api.clients.feature_extractor import FeatureExtractorClient -tracer = trace.get_tracer(__name__) - - class SimBotPlaceholderVisionClient(FeatureExtractorClient): """Run the placeholder vision client.""" diff --git a/src/emma_experience_hub/api/clients/simbot/qa_intent.py b/src/emma_experience_hub/api/clients/simbot/qa_intent.py index 65d54ac1..417e1c3e 100644 --- a/src/emma_experience_hub/api/clients/simbot/qa_intent.py +++ b/src/emma_experience_hub/api/clients/simbot/qa_intent.py @@ -3,12 +3,10 @@ import httpx from loguru import logger from methodtools import lru_cache -from opentelemetry import trace from emma_experience_hub.api.clients.client import Client -tracer = trace.get_tracer(__name__) LRU_CACHE_MAX_SIZE = 64 @@ -21,8 +19,7 @@ def healthcheck(self) -> bool: def process_utterance(self, utterance: str) -> Optional[dict[str, Any]]: """Given an utterance extract intents and entities.""" - with tracer.start_as_current_span("Match text to template"): - response = self._process_utterance(utterance) + response = self._process_utterance(utterance) logger.debug(f"Cache info: {self._process_utterance.cache_info()}") return response diff --git a/src/emma_experience_hub/api/controllers/simbot/controller.py b/src/emma_experience_hub/api/controllers/simbot/controller.py index 1f5143b3..e442119b 100644 --- a/src/emma_experience_hub/api/controllers/simbot/controller.py +++ b/src/emma_experience_hub/api/controllers/simbot/controller.py @@ -1,5 +1,4 @@ from loguru import logger -from opentelemetry import trace from emma_common.datamodels import SpeakerRole from emma_experience_hub.api.controllers.simbot.clients import SimBotControllerClients @@ -14,9 +13,6 @@ ) -tracer = trace.get_tracer(__name__) - - class SimBotController: """Inference pipeline for the live SimBot Challenge.""" @@ -73,24 +69,21 @@ def split_utterance_if_needed(self, session: SimBotSession) -> SimBotSession: return session logger.debug("Trying to split the instruction...") - with tracer.start_as_current_span("Split compound utterance"): - session = self.pipelines.compound_splitter.run(session) - return self.pipelines.compound_splitter.run_coreference_resolution(session) + session = self.pipelines.compound_splitter.run(session) + return self.pipelines.compound_splitter.run_coreference_resolution(session) def load_session_from_request(self, simbot_request: SimBotRequest) -> SimBotSession: """Load the entire session from the given request.""" logger.debug("Running request processing") - with tracer.start_as_current_span("Load session from request"): - session = self.pipelines.request_processing.run(simbot_request) + session = self.pipelines.request_processing.run(simbot_request) # Verify user utterance is valid if self.settings.is_not_offline_evaluation and simbot_request.speech_recognition: - with tracer.start_as_current_span("Verify incoming utterance"): - user_intent = self.pipelines.user_utterance_verifier.run( - simbot_request.speech_recognition - ) - session.current_turn.intent.user = user_intent + user_intent = self.pipelines.user_utterance_verifier.run( + simbot_request.speech_recognition + ) + session.current_turn.intent.user = user_intent # Cache the auxiliary metadata for the turn self.clients.features.get_auxiliary_metadata(session.current_turn) @@ -117,8 +110,7 @@ def extract_intent_from_user_utterance(self, session: SimBotSession) -> SimBotSe # If the utterance is valid, extract the intent from it. logger.debug("Extracting intent from user input...") - with tracer.start_as_current_span("Extract intent from user utterance"): - user_intent = self.pipelines.user_intent_extractor.run(session) + user_intent = self.pipelines.user_intent_extractor.run(session) logger.info(f"[INTENT] User: `{user_intent}`") user_intent_is_not_act = user_intent != SimBotIntentType.act @@ -132,10 +124,9 @@ def extract_intent_from_environment_feedback(self, session: SimBotSession) -> Si """Determine what feedback from the environment tells us to do, if anything.""" logger.debug("Extracting intent from the environment...") - with tracer.start_as_current_span("Extract intent from environment feedback"): - session.current_turn.intent.environment = ( - self.pipelines.environment_intent_extractor.run(session) - ) + session.current_turn.intent.environment = self.pipelines.environment_intent_extractor.run( + session + ) logger.info(f"[INTENT] Environment: `{session.current_turn.intent.environment}`") return session @@ -174,11 +165,10 @@ def get_utterance_from_queue_if_needed(self, session: SimBotSession) -> SimBotSe def decide_what_the_agent_should_do(self, session: SimBotSession) -> SimBotSession: """Decide what the agent should do next.""" logger.debug("Selecting agent intent...") - with tracer.start_as_current_span("Determine agent intent"): - agent_intents = self.pipelines.agent_intent_selector.run(session) + agent_intents = self.pipelines.agent_intent_selector.run(session) - session.current_turn.intent.physical_interaction = agent_intents[0] - session.current_turn.intent.verbal_interaction = agent_intents[1] + session.current_turn.intent.physical_interaction = agent_intents[0] + session.current_turn.intent.verbal_interaction = agent_intents[1] logger.info(f"[INTENT] Interaction: `{session.current_turn.intent.physical_interaction}`") logger.info( @@ -198,13 +188,11 @@ def generate_interaction_action_if_needed(self, session: SimBotSession) -> SimBo # Therefore, there is no interaction for the current turn, try to fill it if session.current_turn.actions.interaction is None: logger.debug("Generating interaction action...") - with tracer.start_as_current_span("Generate interaction action"): - session.current_turn.actions.interaction = ( - self.pipelines.agent_action_generator.run(session) - ) + session.current_turn.actions.interaction = self.pipelines.agent_action_generator.run( + session + ) - with tracer.start_as_current_span("Anticipate instructions"): - self.pipelines.anticipator.run(session) + self.pipelines.anticipator.run(session) logger.info(f"[ACTION] Interaction: `{session.current_turn.actions.interaction}`") return session @@ -213,10 +201,7 @@ def generate_language_action_if_needed(self, session: SimBotSession) -> SimBotSe """Generate a language action if needed.""" logger.debug("Generating utterance for the turn (if needed)...") - with tracer.start_as_current_span("Generate utterance"): - session.current_turn.actions.dialog = self.pipelines.agent_language_generator.run( - session - ) + session.current_turn.actions.dialog = self.pipelines.agent_language_generator.run(session) logger.info(f"[ACTION] Dialog: `{session.current_turn.actions.dialog}`") return session diff --git a/src/emma_experience_hub/api/simbot.py b/src/emma_experience_hub/api/simbot.py index 2c433a06..ea7e436e 100644 --- a/src/emma_experience_hub/api/simbot.py +++ b/src/emma_experience_hub/api/simbot.py @@ -2,7 +2,6 @@ from fastapi import BackgroundTasks, FastAPI, Request, Response, status from loguru import logger -from opentelemetry import trace from emma_experience_hub.api.controllers import SimBotController from emma_experience_hub.api.observability import create_logger_context @@ -10,9 +9,6 @@ from emma_experience_hub.datamodels.simbot import SimBotRequest, SimBotResponse -tracer = trace.get_tracer(__name__) - - app = FastAPI(title="SimBot Challenge Inference") @@ -55,21 +51,19 @@ async def handle_request_from_simbot_arena( raw_request = await request.json() # Parse the request from the server - with tracer.start_as_current_span("Parse raw request"): - try: - simbot_request = SimBotRequest.parse_obj(raw_request) - except Exception as request_err: - logger.exception("Unable to parse request") - response.status_code = status.HTTP_500_INTERNAL_SERVER_ERROR - raise request_err + try: + simbot_request = SimBotRequest.parse_obj(raw_request) + except Exception as request_err: + logger.exception("Unable to parse request") + response.status_code = status.HTTP_500_INTERNAL_SERVER_ERROR + raise request_err with create_logger_context(simbot_request): # Log the incoming request logger.info(f"Received request: {raw_request}") - with tracer.start_as_current_span("Handle request"): - # Handle the request - simbot_response = state["controller"].handle_request_from_simbot_arena(simbot_request) + # Handle the request + simbot_response = state["controller"].handle_request_from_simbot_arena(simbot_request) # Return response logger.info(f"Returning the response {simbot_response.json(by_alias=True)}") diff --git a/src/emma_experience_hub/functions/simbot/special_tokens.py b/src/emma_experience_hub/functions/simbot/special_tokens.py index 85af5e29..8cf03ecd 100644 --- a/src/emma_experience_hub/functions/simbot/special_tokens.py +++ b/src/emma_experience_hub/functions/simbot/special_tokens.py @@ -2,7 +2,6 @@ import torch from loguru import logger -from opentelemetry import trace from pydantic import BaseModel, Field from emma_experience_hub.datamodels import EmmaExtractedFeatures @@ -10,9 +9,6 @@ from emma_experience_hub.functions.simbot.masks import compress_segmentation_mask -tracer = trace.get_tracer(__name__) - - class SimBotSceneObjectTokens(BaseModel): """Token IDs when we find an object in the scene.""" @@ -69,8 +65,7 @@ def get_mask_from_special_tokens( # Populate the bbox region in the mask mask[int(y_min) : int(y_max) + 1, int(x_min) : int(x_max) + 1] = 1 # noqa: WPS221 - with tracer.start_as_current_span("Compress segmentation mask"): - compressed_mask = compress_segmentation_mask(mask) + compressed_mask = compress_segmentation_mask(mask) if return_coords: return compressed_mask, (x_min, y_min, x_max, y_max) diff --git a/src/emma_experience_hub/parsers/simbot/feedback_from_session_context.py b/src/emma_experience_hub/parsers/simbot/feedback_from_session_context.py index 4e775bd4..ab7e7df0 100644 --- a/src/emma_experience_hub/parsers/simbot/feedback_from_session_context.py +++ b/src/emma_experience_hub/parsers/simbot/feedback_from_session_context.py @@ -5,7 +5,6 @@ from typing import Optional from loguru import logger -from opentelemetry import trace from rule_engine import Rule from emma_experience_hub.constants.simbot import get_feedback_rules @@ -13,9 +12,6 @@ from emma_experience_hub.parsers.parser import Parser -tracer = trace.get_tracer(__name__) - - class SimBotFeedbackFromSessionStateParser(Parser[SimBotFeedbackState, SimBotFeedbackRule]): """Get the best response for the current session feedback state.""" @@ -50,7 +46,6 @@ def from_rules_csv(cls) -> "SimBotFeedbackFromSessionStateParser": logger.debug(f"Loaded {len(rules)} feedback rules.") return cls(rules=rules) - @tracer.start_as_current_span("Get compatible rules") def _get_all_compatible_rules(self, state: SimBotFeedbackState) -> list[SimBotFeedbackRule]: """Get all of the rules which are compatible with the current state.""" # Store all the compatible rules with the given query @@ -91,7 +86,6 @@ def _filter_rules(self, state: SimBotFeedbackState) -> Iterator[SimBotFeedbackRu filtered_rules = (rule for rule in self._rules if not rule.is_lightweight_dialog) return filtered_rules - @tracer.start_as_current_span("Select feedback rule") def _select_feedback_rule( self, candidates: list[SimBotFeedbackRule], used_rule_ids: list[int] ) -> SimBotFeedbackRule: diff --git a/src/emma_experience_hub/pipelines/simbot/agent_action_generation.py b/src/emma_experience_hub/pipelines/simbot/agent_action_generation.py index d0e8638f..ea66effe 100644 --- a/src/emma_experience_hub/pipelines/simbot/agent_action_generation.py +++ b/src/emma_experience_hub/pipelines/simbot/agent_action_generation.py @@ -1,7 +1,6 @@ from typing import Callable, Optional from loguru import logger -from opentelemetry import trace from emma_experience_hub.api.clients.simbot import ( SimbotActionPredictionClient, @@ -24,9 +23,6 @@ from emma_experience_hub.pipelines.simbot.find_object import SimBotFindObjectPipeline -tracer = trace.get_tracer(__name__) - - class SimBotAgentActionGenerationPipeline: """Generate an environment interaction for the agent to perform on the environment. @@ -69,12 +65,11 @@ def run(self, session: SimBotSession) -> Optional[SimBotAction]: ) return None - with tracer.start_as_current_span("Generate action"): - try: - return action_intent_handler(session) - except Exception: - logger.error("Failed to convert the agent intent to executable form.") - return None + try: + return action_intent_handler(session) + except Exception: + logger.error("Failed to convert the agent intent to executable form.") + return None def handle_act_intent(self, session: SimBotSession) -> Optional[SimBotAction]: """Generate an action when we want to just act.""" @@ -109,7 +104,6 @@ def _get_action_intent_handler( return switcher[intent.type] - @tracer.start_as_current_span("Predict from template matching") def _predict_action_from_template_matching(self, session: SimBotSession) -> SimBotAction: """Generate an action from the raw-text matching templater.""" if not session.current_turn.speech: @@ -138,25 +132,21 @@ def _predict_action_from_template_matching(self, session: SimBotSession) -> SimB ) raise err - @tracer.start_as_current_span("Predict from Policy") def _predict_action_from_emma_policy(self, session: SimBotSession) -> Optional[SimBotAction]: """Generate an action from the EMMA policy client.""" - with tracer.start_as_current_span("Get turns within interaction window"): - if session.current_turn.utterance_from_queue: - turns_within_interaction_window = session.get_turns_since_last_user_utterance() - else: - turns_within_interaction_window = session.get_turns_within_interaction_window() - - with tracer.start_as_current_span("Build environment state history"): - environment_state_history = session.get_environment_state_history_from_turns( - turns_within_interaction_window, - self._features_client.get_features, - ) + if session.current_turn.utterance_from_queue: + turns_within_interaction_window = session.get_turns_since_last_user_utterance() + else: + turns_within_interaction_window = session.get_turns_within_interaction_window() + + environment_state_history = session.get_environment_state_history_from_turns( + turns_within_interaction_window, + self._features_client.get_features, + ) - with tracer.start_as_current_span("Get dialogue history"): - dialogue_history = session.get_dialogue_history_from_session_turns( - turns_within_interaction_window - ) + dialogue_history = session.get_dialogue_history_from_session_turns( + turns_within_interaction_window + ) raw_action_prediction = self._action_predictor_client.generate( dialogue_history=dialogue_history, @@ -212,7 +202,6 @@ def _should_execute_action(self, session: SimBotSession, action: SimBotAction) - ) return not should_skip_goto_object_action - @tracer.start_as_current_span("Try replace mask with placeholder") def _replace_mask_with_placeholder( self, session: SimBotSession, action: SimBotAction ) -> SimBotAction: diff --git a/src/emma_experience_hub/pipelines/simbot/agent_language_generation.py b/src/emma_experience_hub/pipelines/simbot/agent_language_generation.py index fcc01cf9..c90f8905 100644 --- a/src/emma_experience_hub/pipelines/simbot/agent_language_generation.py +++ b/src/emma_experience_hub/pipelines/simbot/agent_language_generation.py @@ -1,7 +1,6 @@ from typing import Optional from loguru import logger -from opentelemetry import trace from emma_experience_hub.constants.simbot import ACTION_SYNONYMS_FOR_GENERATION, ROOM_SYNONYNMS from emma_experience_hub.datamodels.simbot import ( @@ -17,9 +16,6 @@ ) -tracer = trace.get_tracer(__name__) - - class SimBotAgentLanguageGenerationPipeline: """Generate language for the agent to say to the user.""" @@ -33,11 +29,9 @@ def __init__(self, *, prevent_default_response_as_lightweight: bool = True) -> N def run(self, session: SimBotSession) -> Optional[SimBotDialogAction]: """Generate an utterance to send back to the user.""" - with tracer.start_as_current_span("Get feedback state"): - feedback_state = session.to_feedback_state() + feedback_state = session.to_feedback_state() - with tracer.start_as_current_span("Choose utterance"): - matching_rule = self._feedback_parser(feedback_state) + matching_rule = self._feedback_parser(feedback_state) action = self._generate_dialog_action(matching_rule, feedback_state) return action diff --git a/src/emma_experience_hub/pipelines/simbot/anticipator.py b/src/emma_experience_hub/pipelines/simbot/anticipator.py index 988ca5fa..23e35a73 100644 --- a/src/emma_experience_hub/pipelines/simbot/anticipator.py +++ b/src/emma_experience_hub/pipelines/simbot/anticipator.py @@ -2,7 +2,6 @@ import torch from loguru import logger -from opentelemetry import trace from emma_common.datamodels import SpeakerRole from emma_experience_hub.api.clients.simbot import SimBotFeaturesClient, SimBotHacksClient @@ -13,9 +12,6 @@ from emma_experience_hub.functions.simbot.special_tokens import extract_index_from_special_token -tracer = trace.get_tracer(__name__) - - class SimbotAnticipatorPipeline: """Generate a plan of instructions and appends them to the utterance queue.""" diff --git a/src/emma_experience_hub/pipelines/simbot/find_object.py b/src/emma_experience_hub/pipelines/simbot/find_object.py index 5a184020..4cec830e 100644 --- a/src/emma_experience_hub/pipelines/simbot/find_object.py +++ b/src/emma_experience_hub/pipelines/simbot/find_object.py @@ -1,7 +1,6 @@ from typing import Optional from loguru import logger -from opentelemetry import trace from emma_common.datamodels import EmmaExtractedFeatures, EnvironmentStateTurn from emma_experience_hub.api.clients.simbot import ( @@ -31,9 +30,6 @@ from emma_experience_hub.parsers.simbot import SimBotVisualGroundingOutputParser -tracer = trace.get_tracer(__name__) - - class SimBotFindObjectPipeline: """Pipeline to allow the agent to find object within the arena. @@ -96,8 +92,7 @@ def run(self, session: SimBotSession) -> Optional[SimBotAction]: # noqa: WPS212 """Handle the search through the environment.""" if self._should_start_new_search(session): logger.debug("Preparing search plan...") - with tracer.start_as_current_span("Building search plan"): - search_plan = self._build_search_plan(session) + search_plan = self._build_search_plan(session) if not search_plan: return None @@ -217,7 +212,6 @@ def _next_action_is_dummy(self, session: SimBotSession) -> bool: head_action = session.current_state.find_queue.queue[0] return head_action.type == SimBotActionType.MoveForward - @tracer.start_as_current_span("Trying to find object from visuals") def _get_object_from_turn( self, session: SimBotSession, @@ -236,15 +230,13 @@ def _get_object_from_turn( inventory_entity=session.current_state.inventory.entity, ) - with tracer.start_as_current_span("Parse visual grounding output"): - scene_object_tokens = self._visual_grounding_output_parser(raw_visual_grounding_output) + scene_object_tokens = self._visual_grounding_output_parser(raw_visual_grounding_output) if scene_object_tokens is None: raise AssertionError("Unable to get scene object tokens from the model output.") return scene_object_tokens - @tracer.start_as_current_span("Create actions for the found object") def _create_actions_for_found_object( self, session: SimBotSession, @@ -390,7 +382,6 @@ def _create_action_from_scene_object( ), ) - @tracer.start_as_current_span("Try get next action from plan") def _get_next_action_from_plan(self, session: SimBotSession) -> Optional[SimBotAction]: """If the model did not find the object, get the next action from the search plan.""" next_action: Optional[SimBotAction] = None diff --git a/src/emma_experience_hub/pipelines/simbot/user_intent_extraction.py b/src/emma_experience_hub/pipelines/simbot/user_intent_extraction.py index 46d8071e..9862066c 100644 --- a/src/emma_experience_hub/pipelines/simbot/user_intent_extraction.py +++ b/src/emma_experience_hub/pipelines/simbot/user_intent_extraction.py @@ -2,7 +2,6 @@ from typing import Optional from loguru import logger -from opentelemetry import trace from emma_experience_hub.api.clients import ConfirmationResponseClassifierClient from emma_experience_hub.api.clients.simbot import SimBotQAIntentClient @@ -17,9 +16,6 @@ from emma_experience_hub.parsers.simbot import SimBotQAOutputParser -tracer = trace.get_tracer(__name__) - - class SimBotUserIntentExtractionPipeline: """Determine what the user wants us to do. @@ -73,7 +69,6 @@ def run(self, session: SimBotSession) -> Optional[SimBotUserIntentType]: # If nothing else, just let the model try to act. return SimBotIntentType.act - @tracer.start_as_current_span("Check for user QA") def check_for_user_qa(self, utterance: str) -> Optional[SimBotUserQAType]: """Check if the user is asking us a question or are unparsable utterances.""" raw_user_qa_intent = self._qa_intent_client.process_utterance(utterance) diff --git a/src/emma_experience_hub/pipelines/simbot/user_utterance_verification.py b/src/emma_experience_hub/pipelines/simbot/user_utterance_verification.py index a67c3436..03c6a6f8 100644 --- a/src/emma_experience_hub/pipelines/simbot/user_utterance_verification.py +++ b/src/emma_experience_hub/pipelines/simbot/user_utterance_verification.py @@ -1,7 +1,6 @@ from typing import Optional from loguru import logger -from opentelemetry import trace from emma_experience_hub.api.clients import OutOfDomainDetectorClient, ProfanityFilterClient from emma_experience_hub.datamodels.simbot import ( @@ -12,9 +11,6 @@ from emma_experience_hub.parsers import Parser -tracer = trace.get_tracer(__name__) - - class SimBotUserUtteranceVerificationPipeline: """Validate the utterance from the user is valid. @@ -64,7 +60,6 @@ def run( # noqa: WPS212 # Utterance is not invalid return None - @tracer.start_as_current_span("Check for low ASR confidence") def _utterance_is_low_asr_confidence( self, speech_recognition_payload: SimBotSpeechRecognitionPayload ) -> bool: @@ -72,7 +67,6 @@ def _utterance_is_low_asr_confidence( # If True, then it is ABOVE the threshold and it is NOT low ASR return not self._low_asr_confidence_detector(speech_recognition_payload) - @tracer.start_as_current_span("Check for profanity") def _utterance_contains_profanity( self, speech_recognition_payload: SimBotSpeechRecognitionPayload ) -> bool: @@ -84,7 +78,6 @@ def _utterance_contains_profanity( logger.exception("Unable to check for profanity.") raise err - @tracer.start_as_current_span("Check for out of domain") def _utterance_is_out_of_domain( self, speech_recognition_payload: SimBotSpeechRecognitionPayload ) -> bool: @@ -98,14 +91,12 @@ def _utterance_is_out_of_domain( logger.exception("Unable to check for out of domain utterance.") raise err - @tracer.start_as_current_span("Check for only wake word") def _utterance_only_contains_wake_word( self, speech_recognition_payload: SimBotSpeechRecognitionPayload ) -> bool: """Detect whether the utterance only contains the wake word or not.""" return all(token.is_wake_word for token in speech_recognition_payload.tokens) - @tracer.start_as_current_span("Check for empty utterance") def _utterance_is_empty( self, speech_recognition_payload: SimBotSpeechRecognitionPayload ) -> bool: From 818dd4de73f5658a19f3737b5ef00c8f5dbfa01f Mon Sep 17 00:00:00 2001 From: MalvinaNikandrou Date: Sat, 2 Dec 2023 12:01:02 +0000 Subject: [PATCH 7/8] Add storage/fixture/simbot dirs --- storage/fixtures/simbot/features/.gitkeep | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 storage/fixtures/simbot/features/.gitkeep diff --git a/storage/fixtures/simbot/features/.gitkeep b/storage/fixtures/simbot/features/.gitkeep deleted file mode 100644 index e69de29b..00000000 From 42abeff52a03162f6638e4b69ee066e58fddac56 Mon Sep 17 00:00:00 2001 From: MalvinaNikandrou Date: Sat, 2 Dec 2023 12:14:21 +0000 Subject: [PATCH 8/8] Create fixtures cache dirs if they don't exist --- tests/fixtures/paths.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/fixtures/paths.py b/tests/fixtures/paths.py index b358cebd..d905d60f 100644 --- a/tests/fixtures/paths.py +++ b/tests/fixtures/paths.py @@ -37,13 +37,17 @@ def auxiliary_metadata_dir(fixtures_root: Path) -> Path: @fixture(scope="session") def auxiliary_metadata_cache_dir(fixtures_root: Path) -> Path: """Path to the auxilary the metadata cache.""" - return fixtures_root.joinpath("simbot/metadata_cache_dir/") + cache_dir = fixtures_root.joinpath("simbot/metadata_cache_dir/") + cache_dir.mkdir(exist_ok=True) + return cache_dir @fixture(scope="session") def features_cache_dir(fixtures_root: Path) -> Path: """Path to the auxilary features cache.""" - return fixtures_root.joinpath("simbot/features/") + features_cache_dir = fixtures_root.joinpath("simbot/features/") + features_cache_dir.mkdir(exist_ok=True) + return features_cache_dir @fixture(scope="session")