Rename parsers to be consistent (#8106)

tryptofanik · Manul from Pathway · commit 3e164c43f106 · 2025-01-30T08:33:23.000Z
GitOrigin-RevId: 94d099a84e913a56289b66baa8b6a1f5a7a2fd41
diff --git a/cookbooks/self-rag-agents/pathway_deploy_langgraph_agents.ipynb b/cookbooks/self-rag-agents/pathway_deploy_langgraph_agents.ipynb
@@ -42,7 +42,7 @@
    "id": "9e055de9-723f-44e3-ad39-70cc5f8932bf",
    "metadata": {},
    "source": [
-    "Magic library is used for detecting file types in the `ParseUnstructured` module.\n",
+    "Magic library is used for detecting file types in the `UnstructuredParser` module.\n",
     "\n",
     "If you are running this notebook on **MacOS**, you can install it with:\n",
     "> `brew install libmagic`\n",
@@ -193,7 +193,7 @@
     "\n",
     "\n",
     "1. [Connectors](https://pathway.com/developers/user-guide/connect/pathway-connectors): Use Pathway’s file reader to ingest all text files under the `DATA_PATH`.\n",
-    "2. [Parsers](https://pathway.com/developers/api-docs/pathway-xpacks-llm/parsers): Utilize the ParseUnstructured to parse the documents. This parser supports multiple file types, including PDF, DOCX, and PPTX.\n",
+    "2. [Parsers](https://pathway.com/developers/api-docs/pathway-xpacks-llm/parsers): Utilize the UnstructuredParser to parse the documents. This parser supports multiple file types, including PDF, DOCX, and PPTX.\n",
     "3. [Text Splitters](https://pathway.com/developers/api-docs/pathway-xpacks-llm/splitters): Split the document content into chunks.\n",
     "4. [Embedders](https://pathway.com/developers/api-docs/pathway-xpacks-llm/embedders): Use OpenAI API for embeddings."
    ]
@@ -242,7 +242,7 @@
     "sources = [folder]\n",
     "\n",
     "# define the document processing steps\n",
-    "parser = parsers.ParseUnstructured()\n",
+    "parser = parsers.UnstructuredParser()\n",
     "\n",
     "text_splitter = splitters.TokenCountSplitter(min_tokens=150, max_tokens=450)\n",
     "\n",
diff --git a/cookbooks/self-rag-agents/pathway_langgraph_agentic_rag.ipynb b/cookbooks/self-rag-agents/pathway_langgraph_agentic_rag.ipynb
@@ -31,7 +31,7 @@
    "id": "f42f3015-33e6-48f4-827a-1e44541507cd",
    "metadata": {},
    "source": [
-    "Magic library is used for detecting file types in the `ParseUnstructured` module.\n",
+    "Magic library is used for detecting file types in the `UnstructuredParser` module.\n",
     "\n",
     "If you are running this notebook on **MacOS**, you can install it with:\n",
     "> `brew install libmagic`\n",
@@ -193,7 +193,7 @@
     "\n",
     "\n",
     "1. [Connectors](https://pathway.com/developers/user-guide/connect/pathway-connectors): Use Pathway’s file reader to ingest all text files under the `DATA_PATH`.\n",
-    "2. [Parsers](https://pathway.com/developers/api-docs/pathway-xpacks-llm/parsers): Utilize the ParseUnstructured to parse the documents. This parser supports multiple file types, including PDF, DOCX, and PPTX.\n",
+    "2. [Parsers](https://pathway.com/developers/api-docs/pathway-xpacks-llm/parsers): Utilize the UnstructuredParser to parse the documents. This parser supports multiple file types, including PDF, DOCX, and PPTX.\n",
     "3. [Text Splitters](https://pathway.com/developers/api-docs/pathway-xpacks-llm/splitters): Split the document content into chunks.\n",
     "4. [Embedders](https://pathway.com/developers/api-docs/pathway-xpacks-llm/embedders): Use OpenAI API for embeddings.\n",
     "5. [VectorStore](https://pathway.com/developers/api-docs/pathway-xpacks-llm/vectorstore): Orchestrates all the above modules."
@@ -255,7 +255,7 @@
     "sources = [folder]\n",
     "\n",
     "# define the document processing steps\n",
-    "parser = parsers.ParseUnstructured()\n",
+    "parser = parsers.UnstructuredParser()\n",
     "\n",
     "text_splitter = splitters.TokenCountSplitter(min_tokens=150, max_tokens=450)\n",
     "\n",
diff --git a/examples/pipelines/adaptive-rag/app.yaml b/examples/pipelines/adaptive-rag/app.yaml
@@ -39,7 +39,7 @@ $embedder: !pw.xpacks.llm.embedders.OpenAIEmbedder
 $splitter: !pw.xpacks.llm.splitters.TokenCountSplitter
   max_tokens: 400
 
-$parser: !pw.xpacks.llm.parsers.ParseUnstructured
+$parser: !pw.xpacks.llm.parsers.UnstructuredParser
   cache_strategy: !pw.udfs.DefaultCache
 
 $retriever_factory: !pw.stdlib.indexing.BruteForceKnnFactory
diff --git a/examples/pipelines/demo-document-indexing/app.yaml b/examples/pipelines/demo-document-indexing/app.yaml
@@ -34,7 +34,7 @@ $embedder: !pw.xpacks.llm.embedders.SentenceTransformerEmbedder
 $splitter: !pw.xpacks.llm.splitters.TokenCountSplitter
   max_tokens: 400
 
-$parser: !pw.xpacks.llm.parsers.ParseUnstructured
+$parser: !pw.xpacks.llm.parsers.UnstructuredParser
   cache_strategy: !pw.udfs.DefaultCache
 
 $retriever_factory: !pw.stdlib.indexing.BruteForceKnnFactory
diff --git a/examples/pipelines/demo-question-answering/app.yaml b/examples/pipelines/demo-question-answering/app.yaml
@@ -39,7 +39,7 @@ $embedder: !pw.xpacks.llm.embedders.OpenAIEmbedder
 $splitter: !pw.xpacks.llm.splitters.TokenCountSplitter
   max_tokens: 400
 
-$parser: !pw.xpacks.llm.parsers.ParseUnstructured
+$parser: !pw.xpacks.llm.parsers.UnstructuredParser
   cache_strategy: !pw.udfs.DefaultCache
 
 $retriever_factory: !pw.stdlib.indexing.BruteForceKnnFactory
diff --git a/examples/pipelines/drive_alert/app.py b/examples/pipelines/drive_alert/app.py
@@ -35,7 +35,7 @@
 from pathway.stdlib.ml.index import KNNIndex
 from pathway.xpacks.llm.embedders import OpenAIEmbedder
 from pathway.xpacks.llm.llms import OpenAIChat, prompt_chat_single_qa
-from pathway.xpacks.llm.parsers import ParseUnstructured
+from pathway.xpacks.llm.parsers import UnstructuredParser
 from pathway.xpacks.llm.splitters import TokenCountSplitter
 
 # To use advanced features with Pathway Scale, get your free license key from
@@ -165,7 +165,7 @@ def run(
         service_user_credentials_file=service_user_credentials_file,
         refresh_interval=30,  # interval between fetch operations in seconds, lower this for more responsiveness
     )
-    parser = ParseUnstructured()
+    parser = UnstructuredParser()
     documents = files.select(texts=parser(pw.this.data))
     documents = documents.flatten(pw.this.texts)
     documents = documents.select(texts=pw.this.texts[0])
diff --git a/examples/pipelines/private-rag/app.yaml b/examples/pipelines/private-rag/app.yaml
@@ -46,7 +46,7 @@ $embedder: !pw.xpacks.llm.embedders.SentenceTransformerEmbedder
 $splitter: !pw.xpacks.llm.splitters.TokenCountSplitter
   max_tokens: 400
 
-$parser: !pw.xpacks.llm.parsers.ParseUnstructured
+$parser: !pw.xpacks.llm.parsers.UnstructuredParser
   cache_strategy: !pw.udfs.DefaultCache
 
 $retriever_factory: !pw.stdlib.indexing.BruteForceKnnFactory
diff --git a/examples/pipelines/unstructured_to_sql_on_the_fly/app.py b/examples/pipelines/unstructured_to_sql_on_the_fly/app.py
@@ -65,7 +65,7 @@
 import tiktoken
 from pathway.stdlib.utils.col import unpack_col
 from pathway.xpacks.llm.llms import OpenAIChat, prompt_chat_single_qa
-from pathway.xpacks.llm.parsers import ParseUnstructured
+from pathway.xpacks.llm.parsers import UnstructuredParser
 
 # To use advanced features with Pathway Scale, get your free license key from
 # https://pathway.com/features and paste it below.
@@ -302,7 +302,7 @@ def run(
         data_dir,
         format="binary",
     )
-    parser = ParseUnstructured()
+    parser = UnstructuredParser()
     unstructured_documents = files.select(texts=parser(pw.this.data)).select(
         texts=strip_metadata(pw.this.texts)
     )