Skip to content

Commit 94181a9

Browse files
authored
Refa: knowledge_graph chunk method is deprecated (#7220)
### What problem does this PR solve? The knowledge_graph chunk method is deprecated and should no longer be used. #7184. ### Type of change - [x] Refactoring
1 parent 03672df commit 94181a9

File tree

7 files changed

+7
-28
lines changed

7 files changed

+7
-28
lines changed

api/apps/sdk/dataset.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def create(tenant_id):
6969
chunk_method:
7070
type: string
7171
enum: ["naive", "manual", "qa", "table", "paper", "book", "laws",
72-
"presentation", "picture", "one", "knowledge_graph", "email", "tag"
72+
"presentation", "picture", "one", "email", "tag"
7373
]
7474
description: Chunking method.
7575
parser_config:
@@ -105,7 +105,6 @@ def create(tenant_id):
105105
"presentation",
106106
"picture",
107107
"one",
108-
"knowledge_graph",
109108
"email",
110109
"tag"
111110
]
@@ -315,7 +314,7 @@ def update(tenant_id, dataset_id):
315314
chunk_method:
316315
type: string
317316
enum: ["naive", "manual", "qa", "table", "paper", "book", "laws",
318-
"presentation", "picture", "one", "knowledge_graph", "email", "tag"
317+
"presentation", "picture", "one", "email", "tag"
319318
]
320319
description: Updated chunking method.
321320
parser_config:
@@ -353,7 +352,6 @@ def update(tenant_id, dataset_id):
353352
"presentation",
354353
"picture",
355354
"one",
356-
"knowledge_graph",
357355
"email",
358356
"tag"
359357
]

api/settings.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def init_settings():
109109
API_KEY = LLM.get("api_key", "")
110110
PARSERS = LLM.get(
111111
"parsers",
112-
"naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,knowledge_graph:Knowledge Graph,email:Email,tag:Tag")
112+
"naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,email:Email,tag:Tag")
113113

114114
HOST_IP = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("host", "127.0.0.1")
115115
HOST_PORT = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("http_port")

docs/references/http_api_reference.md

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -393,7 +393,6 @@ curl --request POST \
393393
- `"presentation"`: Presentation
394394
- `"picture"`: Picture
395395
- `"one"`: One
396-
- `"knowledge_graph"`: Knowledge Graph
397396
Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens!
398397
- `"email"`: Email
399398

@@ -409,10 +408,6 @@ curl --request POST \
409408
- If `"chunk_method"` is `"qa"`, `"manuel"`, `"paper"`, `"book"`, `"laws"`, or `"presentation"`, the `"parser_config"` object contains the following attribute:
410409
- `"raptor"`: Raptor-specific settings. Defaults to: `{"use_raptor": false}`.
411410
- If `"chunk_method"` is `"table"`, `"picture"`, `"one"`, or `"email"`, `"parser_config"` is an empty JSON object.
412-
- If `"chunk_method"` is `"knowledge_graph"`, the `"parser_config"` object contains the following attributes:
413-
- `"chunk_token_count"`: Defaults to `128`.
414-
- `"delimiter"`: Defaults to `"\n"`.
415-
- `"entity_types"`: Defaults to `["organization","person","location","event","time"]`
416411

417412
#### Response
418413

@@ -573,8 +568,6 @@ curl --request PUT \
573568
- `"picture"`: Picture
574569
- `"one"`:One
575570
- `"email"`: Email
576-
- `"knowledge_graph"`: Knowledge Graph
577-
Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens!
578571

579572
#### Response
580573

@@ -655,7 +648,7 @@ Success:
655648
"id": "6e211ee0723611efa10a0242ac120007",
656649
"language": "English",
657650
"name": "mysql",
658-
"chunk_method": "knowledge_graph",
651+
"chunk_method": "naive",
659652
"parser_config": {
660653
"chunk_token_num": 8192,
661654
"delimiter": "\\n",
@@ -841,10 +834,6 @@ curl --request PUT \
841834
- If `"chunk_method"` is `"qa"`, `"manuel"`, `"paper"`, `"book"`, `"laws"`, or `"presentation"`, the `"parser_config"` object contains the following attribute:
842835
- `"raptor"`: Raptor-specific settings. Defaults to: `{"use_raptor": false}`.
843836
- If `"chunk_method"` is `"table"`, `"picture"`, `"one"`, or `"email"`, `"parser_config"` is an empty JSON object.
844-
- If `"chunk_method"` is `"knowledge_graph"`, the `"parser_config"` object contains the following attributes:
845-
- `"chunk_token_count"`: Defaults to `128`.
846-
- `"delimiter"`: Defaults to `"\n"`.
847-
- `"entity_types"`: Defaults to `["organization","person","location","event","time"]`
848837

849838
#### Response
850839

docs/references/python_api_reference.md

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,6 @@ The chunking method of the dataset to create. Available options:
145145
- `"presentation"`: Presentation
146146
- `"picture"`: Picture
147147
- `"one"`: One
148-
- `"knowledge_graph"`: Knowledge Graph
149-
Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens!
150148
- `"email"`: Email
151149

152150
##### parser_config
@@ -398,8 +396,6 @@ A dictionary representing the attributes to update, with the following keys:
398396
- `"presentation"`: Presentation
399397
- `"picture"`: Picture
400398
- `"one"`: One
401-
- `"knowledge_graph"`: Knowledge Graph
402-
Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens!
403399
- `"email"`: Email
404400
- `"parser_config"`: `dict[str, Any]` The parsing configuration for the document. Its attributes vary based on the selected `"chunk_method"`:
405401
- `"chunk_method"`=`"naive"`:

sdk/python/test/test_http_api/test_dataset_mangement/test_create_dataset.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,6 @@ def test_permission(self, get_http_api_auth, name, permission, expected_code):
122122
("presentation", "presentation", 0),
123123
("picture", "picture", 0),
124124
("one", "one", 0),
125-
("picknowledge_graphture", "knowledge_graph", 0),
126125
("email", "email", 0),
127126
("tag", "tag", 0),
128127
("empty_chunk_method", "", 0),

sdk/python/test/test_http_api/test_dataset_mangement/test_update_dataset.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,14 +114,13 @@ def test_embedding_model(self, get_http_api_auth, add_dataset_func, embedding_mo
114114
("presentation", 0, ""),
115115
("picture", 0, ""),
116116
("one", 0, ""),
117-
("knowledge_graph", 0, ""),
118117
("email", 0, ""),
119118
("tag", 0, ""),
120119
("", 0, ""),
121120
(
122121
"other_chunk_method",
123122
102,
124-
"'other_chunk_method' is not in ['naive', 'manual', 'qa', 'table', 'paper', 'book', 'laws', 'presentation', 'picture', 'one', 'knowledge_graph', 'email', 'tag']",
123+
"'other_chunk_method' is not in ['naive', 'manual', 'qa', 'table', 'paper', 'book', 'laws', 'presentation', 'picture', 'one', 'email', 'tag']",
125124
),
126125
],
127126
)

sdk/python/test/test_sdk_api/t_dataset.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,17 +38,15 @@ def test_create_dataset_with_duplicated_name(get_api_key_fixture):
3838
def test_create_dataset_with_random_chunk_method(get_api_key_fixture):
3939
API_KEY = get_api_key_fixture
4040
rag = RAGFlow(API_KEY, HOST_ADDRESS)
41-
valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one",
42-
"knowledge_graph", "email"]
41+
valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one", "email"]
4342
random_chunk_method = random.choice(valid_chunk_methods)
4443
rag.create_dataset("test_create_dataset_with_random_chunk_method", chunk_method=random_chunk_method)
4544

4645

4746
def test_create_dataset_with_invalid_parameter(get_api_key_fixture):
4847
API_KEY = get_api_key_fixture
4948
rag = RAGFlow(API_KEY, HOST_ADDRESS)
50-
valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one",
51-
"knowledge_graph", "email", "tag"]
49+
valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one", "email", "tag"]
5250
chunk_method = "invalid_chunk_method"
5351
with pytest.raises(Exception) as exc_info:
5452
rag.create_dataset("test_create_dataset_with_invalid_chunk_method", chunk_method=chunk_method)

0 commit comments

Comments
 (0)