TypeError: Query column vector must be a vector. Got list<item: double>. #1378

bhupender101 · 2024-11-07T09:10:44Z

Do you need to file an issue?

I have searched the existing issues and this bug is not already filed.
My model is hosted on OpenAI or Azure. If not, please look at the "model providers" issue and don't file a new one here.
I believe this is a legitimate bug, not just a question. If this is a question, please use the Discussions area.

Describe the bug

TypeError Traceback (most recent call last)
Cell In[11], line 1
----> 1 graph.local_search(query="ok")

File ~/Desktop/Genai/bloomfire-backend/rm/msgraph.py:252, in MSGraphRAGSearch.local_search(self, query)
242 def local_search(self, query: str):
243 """
244 Perform a local search with the given query.
245
(...)
250 Dictionary containing search results and metadata
251 """
--> 252 result = self.local_search_engine.search(query=query)
253 return {
254 "response": convert_response_to_string(result.response),
255 "context_data": result.context_data,
(...)
259 "prompt_tokens": result.prompt_tokens,
260 }

File ~/miniconda3/envs/genai/lib/python3.11/site-packages/graphrag/query/structured_search/local_search/search.py:158, in LocalSearch.search(self, query, conversation_history, **kwargs)
156 start_time = time.time()
157 search_prompt = ""
--> 158 context_text, context_records = self.context_builder.build_context(
159 query=query,
160 conversation_history=conversation_history,
161 **kwargs,
162 **self.context_builder_params,
163 )
164 log.info("GENERATE ANSWER: %d. QUERY: %s", start_time, query)
165 try:

File ~/miniconda3/envs/genai/lib/python3.11/site-packages/graphrag/query/structured_search/local_search/mixed_context.py:140, in LocalSearchMixedContext.build_context(self, query, conversation_history, include_entity_names, exclude_entity_names, conversation_history_max_turns, conversation_history_user_turns_only, max_tokens, text_unit_prop, community_prop, top_k_mapped_entities, top_k_relationships, include_community_rank, include_entity_rank, rank_description, include_relationship_weight, relationship_ranking_attribute, return_candidate_context, use_community_summary, min_community_rank, community_context_name, column_delimiter, **kwargs)
135 pre_user_questions = "\n".join(
136 conversation_history.get_user_turns(conversation_history_max_turns)
137 )
138 query = f"{query}\n{pre_user_questions}"
--> 140 selected_entities = map_query_to_entities(
141 query=query,
142 text_embedding_vectorstore=self.entity_text_embeddings,
143 text_embedder=self.text_embedder,
144 all_entities_dict=self.entities,
145 embedding_vectorstore_key=self.embedding_vectorstore_key,
146 include_entity_names=include_entity_names,
147 exclude_entity_names=exclude_entity_names,
148 k=top_k_mapped_entities,
149 oversample_scaler=2,
150 )
152 # build context
153 final_context = liststr

File ~/miniconda3/envs/genai/lib/python3.11/site-packages/graphrag/query/context_builder/entity_extraction.py:57, in map_query_to_entities(query, text_embedding_vectorstore, text_embedder, all_entities_dict, embedding_vectorstore_key, include_entity_names, exclude_entity_names, k, oversample_scaler)
53 matched_entities = []
54 if query != "":
55 # get entities with highest semantic similarity to query
56 # oversample to account for excluded entities
---> 57 search_results = text_embedding_vectorstore.similarity_search_by_text(
58 text=query,
59 text_embedder=lambda t: text_embedder.embed(t),
60 k=k * oversample_scaler,
61 )
62 for result in search_results:
63 if embedding_vectorstore_key == EntityVectorStoreKey.ID and isinstance(
64 result.document.id, str
65 ):

File ~/miniconda3/envs/genai/lib/python3.11/site-packages/graphrag/vector_stores/lancedb.py:136, in LanceDBVectorStore.similarity_search_by_text(self, text, text_embedder, k, **kwargs)
134 query_embedding = text_embedder(text)
135 if query_embedding:
--> 136 return self.similarity_search_by_vector(query_embedding, k)
137 return []

File ~/miniconda3/envs/genai/lib/python3.11/site-packages/graphrag/vector_stores/lancedb.py:115, in LanceDBVectorStore.similarity_search_by_vector(self, query_embedding, k, **kwargs)
101 docs = (
102 self.document_collection.search(
103 query=query_embedding, vector_column_name="vector"
(...)
107 .to_list()
108 )
109 else:
110 docs = (
111 self.document_collection.search(
112 query=query_embedding, vector_column_name="vector"
113 )
114 .limit(k)
--> 115 .to_list()
116 )
117 return [
118 VectorStoreSearchResult(
119 document=VectorStoreDocument(
(...)
127 for doc in docs
128 ]

File ~/miniconda3/envs/genai/lib/python3.11/site-packages/lancedb/query.py:320, in LanceQueryBuilder.to_list(self)
312 def to_list(self) -> List[dict]:
313 """
314 Execute the query and return the results as a list of dictionaries.
315
(...)
318 fields are returned whether or not they're explicitly selected.
319 """
--> 320 return self.to_arrow().to_pylist()

File ~/miniconda3/envs/genai/lib/python3.11/site-packages/lancedb/query.py:647, in LanceVectorQueryBuilder.to_arrow(self)
638 def to_arrow(self) -> pa.Table:
639 """
640 Execute the query and return the results as an
641 Apache Arrow Table.
(...)
645 vector and the returned vectors.
646 """
--> 647 return self.to_batches().read_all()

File ~/miniconda3/envs/genai/lib/python3.11/site-packages/lancedb/query.py:678, in LanceVectorQueryBuilder.to_batches(self, batch_size)
664 vector = [v.tolist() for v in vector]
665 query = Query(
666 vector=vector,
667 filter=self._where,
(...)
676 offset=self._offset,
677 )
--> 678 result_set = self._table._execute_query(query, batch_size)
679 if self._reranker is not None:
680 rs_table = result_set.read_all()

File ~/miniconda3/envs/genai/lib/python3.11/site-packages/lancedb/table.py:1742, in LanceTable._execute_query(self, query, batch_size)
1733 if len(query.vector) > 0:
1734 nearest = {
1735 "column": query.vector_column,
1736 "q": query.vector,
(...)
1740 "refine_factor": query.refine_factor,
1741 }
-> 1742 return ds.scanner(
1743 columns=query.columns,
1744 limit=query.k,
1745 filter=query.filter,
1746 prefilter=query.prefilter,
1747 nearest=nearest,
1748 full_text_query=query.full_text_query,
1749 with_row_id=query.with_row_id,
1750 batch_size=batch_size,
1751 offset=query.offset,
1752 ).to_reader()

File ~/miniconda3/envs/genai/lib/python3.11/site-packages/lance/dataset.py:369, in LanceDataset.scanner(self, columns, filter, limit, offset, nearest, batch_size, batch_readahead, fragment_readahead, scan_in_order, fragments, full_text_query, prefilter, with_row_id, with_row_address, use_stats, fast_search, io_buffer_size)
367 builder = builder.full_text_search(**full_text_query)
368 if nearest is not None:
--> 369 builder = builder.nearest(**nearest)
370 return builder.to_scanner()

File ~/miniconda3/envs/genai/lib/python3.11/site-packages/lance/dataset.py:2449, in ScannerBuilder.nearest(self, column, q, k, metric, nprobes, refine_factor, use_index, ef)
2447 column_type = column_type.storage_type
2448 if not pa.types.is_fixed_size_list(column_type):
-> 2449 raise TypeError(
2450 f"Query column {column} must be a vector. Got {column_field.type}."
2451 )
2452 if len(q) != column_type.list_size:
2453 raise ValueError(
2454 f"Query vector size {len(q)} does not match index column size"
2455 f" {column_type.list_size}"
2456 )

TypeError: Query column vector must be a vector. Got list<item: double>.

Steps to reproduce

This is the Error when i am Doing the Local Searech and Global Seach

Expected Behavior

No response

GraphRAG Config Used

# Paste your config here

Logs and screenshots

No response

Additional Information

GraphRAG Version:
Operating System:
Python Version:
Related Issues:

MBRSL · 2024-11-08T09:18:42Z

Dup #1335

donaldNtjana · 2024-11-11T13:30:29Z

loading 'create_final_entities.parquet' and creating description_embedding worked for me when doing 'local search'.

# Concatenate name and description for embedding
entity_embedding_df["name_description"] = (
    entity_embedding_df["name"] + ":" + entity_embedding_df["description"]
)

# Run embedding
entity_embedding_df["description_embedding"] = embed_text(entity_embedding_df["name_description"])

rivera2387 · 2024-11-12T17:49:24Z

@donaldNtjana What is embed_text? I'm getting error saying embed_text is not defined.

donaldNtjana · 2024-11-12T18:04:41Z

embed_text would be the function to call to embed your column/text.

Use the same embedding model you specified in your config. I used the Open AI embedding

from graphrag.query.llm.oai.embedding import OpenAIEmbedding
def embed_text(column):

    text_embedder = OpenAIEmbedding(
            api_key="your_api_key",
            api_base ="your_api_base",
            model = "your_model"
    )

    return column.apply(lambda x: text_embedder.embed(x))

bhupender101 added bug Something isn't working triage Default label assignment, indicates new issue needs reviewed by a maintainer labels Nov 7, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

TypeError: Query column vector must be a vector. Got list<item: double>. #1378

TypeError: Query column vector must be a vector. Got list<item: double>. #1378

bhupender101 commented Nov 7, 2024

MBRSL commented Nov 8, 2024

donaldNtjana commented Nov 11, 2024

rivera2387 commented Nov 12, 2024

donaldNtjana commented Nov 12, 2024 •

edited

Loading

TypeError: Query column vector must be a vector. Got list<item: double>. #1378

TypeError: Query column vector must be a vector. Got list<item: double>. #1378

Comments

bhupender101 commented Nov 7, 2024

Do you need to file an issue?

Describe the bug

Steps to reproduce

Expected Behavior

GraphRAG Config Used

Logs and screenshots

Additional Information

MBRSL commented Nov 8, 2024

donaldNtjana commented Nov 11, 2024

rivera2387 commented Nov 12, 2024

donaldNtjana commented Nov 12, 2024 • edited Loading

donaldNtjana commented Nov 12, 2024 •

edited

Loading