Refa: knowledge_graph chunk method is deprecated (#7220)

yongtenglei · web-flow · commit 94181a990b95 · 2025-04-23T13:01:46.000+08:00
### What problem does this PR solve? The knowledge_graph chunk method is deprecated and should no longer be used. #7184. ### Type of change - [x] Refactoring
diff --git a/api/apps/sdk/dataset.py b/api/apps/sdk/dataset.py
@@ -69,7 +69,7 @@ def create(tenant_id):
             chunk_method:
               type: string
               enum: ["naive", "manual", "qa", "table", "paper", "book", "laws",
-                     "presentation", "picture", "one", "knowledge_graph", "email", "tag"
+                     "presentation", "picture", "one", "email", "tag"
                      ]
               description: Chunking method.
             parser_config:
@@ -105,7 +105,6 @@ def create(tenant_id):
         "presentation",
         "picture",
         "one",
-        "knowledge_graph",
         "email",
         "tag"
     ]
@@ -315,7 +314,7 @@ def update(tenant_id, dataset_id):
             chunk_method:
               type: string
               enum: ["naive", "manual", "qa", "table", "paper", "book", "laws",
-                     "presentation", "picture", "one", "knowledge_graph", "email", "tag"
+                     "presentation", "picture", "one", "email", "tag"
                      ]
               description: Updated chunking method.
             parser_config:
@@ -353,7 +352,6 @@ def update(tenant_id, dataset_id):
         "presentation",
         "picture",
         "one",
-        "knowledge_graph",
         "email",
         "tag"
     ]
diff --git a/api/settings.py b/api/settings.py
@@ -109,7 +109,7 @@ def init_settings():
     API_KEY = LLM.get("api_key", "")
     PARSERS = LLM.get(
         "parsers",
-        "naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,knowledge_graph:Knowledge Graph,email:Email,tag:Tag")
+        "naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,email:Email,tag:Tag")
 
     HOST_IP = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("host", "127.0.0.1")
     HOST_PORT = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("http_port")
diff --git a/docs/references/http_api_reference.md b/docs/references/http_api_reference.md
@@ -393,7 +393,6 @@ curl --request POST \
   - `"presentation"`: Presentation
   - `"picture"`: Picture
   - `"one"`: One
-  - `"knowledge_graph"`: Knowledge Graph  
     Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens!
   - `"email"`: Email
 
@@ -409,10 +408,6 @@ curl --request POST \
   - If `"chunk_method"` is `"qa"`, `"manuel"`, `"paper"`, `"book"`, `"laws"`, or `"presentation"`, the `"parser_config"` object contains the following attribute:  
     - `"raptor"`: Raptor-specific settings. Defaults to: `{"use_raptor": false}`.
   - If `"chunk_method"` is `"table"`, `"picture"`, `"one"`, or `"email"`, `"parser_config"` is an empty JSON object.
-  - If `"chunk_method"` is `"knowledge_graph"`, the `"parser_config"` object contains the following attributes:  
-    - `"chunk_token_count"`: Defaults to `128`.
-    - `"delimiter"`: Defaults to `"\n"`.
-    - `"entity_types"`: Defaults to `["organization","person","location","event","time"]`
 
 #### Response
 
@@ -573,8 +568,6 @@ curl --request PUT \
   - `"picture"`: Picture
   - `"one"`:One
   - `"email"`: Email
-  - `"knowledge_graph"`: Knowledge Graph  
-    Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens!
 
 #### Response
 
@@ -655,7 +648,7 @@ Success:
             "id": "6e211ee0723611efa10a0242ac120007",
             "language": "English",
             "name": "mysql",
-            "chunk_method": "knowledge_graph",
+            "chunk_method": "naive",
             "parser_config": {
                 "chunk_token_num": 8192,
                 "delimiter": "\\n",
@@ -841,10 +834,6 @@ curl --request PUT \
   - If `"chunk_method"` is `"qa"`, `"manuel"`, `"paper"`, `"book"`, `"laws"`, or `"presentation"`, the `"parser_config"` object contains the following attribute:
     - `"raptor"`: Raptor-specific settings. Defaults to: `{"use_raptor": false}`.
   - If `"chunk_method"` is `"table"`, `"picture"`, `"one"`, or `"email"`, `"parser_config"` is an empty JSON object.
-  - If `"chunk_method"` is `"knowledge_graph"`, the `"parser_config"` object contains the following attributes:
-    - `"chunk_token_count"`: Defaults to `128`.
-    - `"delimiter"`: Defaults to `"\n"`.
-    - `"entity_types"`: Defaults to `["organization","person","location","event","time"]`
 
 #### Response
 
diff --git a/docs/references/python_api_reference.md b/docs/references/python_api_reference.md
@@ -145,8 +145,6 @@ The chunking method of the dataset to create. Available options:
 - `"presentation"`: Presentation
 - `"picture"`: Picture
 - `"one"`: One
-- `"knowledge_graph"`: Knowledge Graph  
-  Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens!
 - `"email"`: Email
 
 ##### parser_config
@@ -398,8 +396,6 @@ A dictionary representing the attributes to update, with the following keys:
   - `"presentation"`: Presentation
   - `"picture"`: Picture
   - `"one"`: One
-  - `"knowledge_graph"`: Knowledge Graph  
-    Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens!
   - `"email"`: Email
 - `"parser_config"`: `dict[str, Any]` The parsing configuration for the document. Its attributes vary based on the selected `"chunk_method"`:
   - `"chunk_method"`=`"naive"`:  
diff --git a/sdk/python/test/test_http_api/test_dataset_mangement/test_create_dataset.py b/sdk/python/test/test_http_api/test_dataset_mangement/test_create_dataset.py
@@ -122,7 +122,6 @@ def test_permission(self, get_http_api_auth, name, permission, expected_code):
             ("presentation", "presentation", 0),
             ("picture", "picture", 0),
             ("one", "one", 0),
-            ("picknowledge_graphture", "knowledge_graph", 0),
             ("email", "email", 0),
             ("tag", "tag", 0),
             ("empty_chunk_method", "", 0),
diff --git a/sdk/python/test/test_http_api/test_dataset_mangement/test_update_dataset.py b/sdk/python/test/test_http_api/test_dataset_mangement/test_update_dataset.py
@@ -114,14 +114,13 @@ def test_embedding_model(self, get_http_api_auth, add_dataset_func, embedding_mo
             ("presentation", 0, ""),
             ("picture", 0, ""),
             ("one", 0, ""),
-            ("knowledge_graph", 0, ""),
             ("email", 0, ""),
             ("tag", 0, ""),
             ("", 0, ""),
             (
                 "other_chunk_method",
                 102,
-                "'other_chunk_method' is not in ['naive', 'manual', 'qa', 'table', 'paper', 'book', 'laws', 'presentation', 'picture', 'one', 'knowledge_graph', 'email', 'tag']",
+                "'other_chunk_method' is not in ['naive', 'manual', 'qa', 'table', 'paper', 'book', 'laws', 'presentation', 'picture', 'one', 'email', 'tag']",
             ),
         ],
     )
diff --git a/sdk/python/test/test_sdk_api/t_dataset.py b/sdk/python/test/test_sdk_api/t_dataset.py
@@ -38,17 +38,15 @@ def test_create_dataset_with_duplicated_name(get_api_key_fixture):
 def test_create_dataset_with_random_chunk_method(get_api_key_fixture):
     API_KEY = get_api_key_fixture
     rag = RAGFlow(API_KEY, HOST_ADDRESS)
-    valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one",
-                           "knowledge_graph", "email"]
+    valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one", "email"]
     random_chunk_method = random.choice(valid_chunk_methods)
     rag.create_dataset("test_create_dataset_with_random_chunk_method", chunk_method=random_chunk_method)
 
 
 def test_create_dataset_with_invalid_parameter(get_api_key_fixture):
     API_KEY = get_api_key_fixture
     rag = RAGFlow(API_KEY, HOST_ADDRESS)
-    valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one",
-                           "knowledge_graph", "email", "tag"]
+    valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one", "email", "tag"]
     chunk_method = "invalid_chunk_method"
     with pytest.raises(Exception) as exc_info:
         rag.create_dataset("test_create_dataset_with_invalid_chunk_method", chunk_method=chunk_method)