Spaces:
Running
on
Zero
Running
on
Zero
Bellok
commited on
Commit
·
e26c40f
1
Parent(s):
5e6fb91
chore-debug: add debug prints to inspect query and document embeddings
Browse filesDuring debugging, added stderr prints to output first 5 values of query embeddings and compare the first 10 values of the first two document embeddings to check for duplicates or issues in the retrieval process.
- warbler_cda/retrieval_api.py +11 -1
warbler_cda/retrieval_api.py
CHANGED
|
@@ -396,9 +396,10 @@ class RetrievalAPI:
|
|
| 396 |
|
| 397 |
# If embedding provider available, use it
|
| 398 |
if self.embedding_provider:
|
| 399 |
-
|
| 400 |
try:
|
| 401 |
query_embedding = self.embedding_provider.embed_text(query.semantic_query)
|
|
|
|
| 402 |
except OSError:
|
| 403 |
return results
|
| 404 |
|
|
@@ -500,6 +501,15 @@ class RetrievalAPI:
|
|
| 500 |
embeddings_list.append(doc_data["embedding"])
|
| 501 |
doc_ids.append(doc_id)
|
| 502 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 503 |
if not embeddings_list:
|
| 504 |
return self._search_context_store_keyword(query)
|
| 505 |
|
|
|
|
| 396 |
|
| 397 |
# If embedding provider available, use it
|
| 398 |
if self.embedding_provider:
|
| 399 |
+
# Get query embedding
|
| 400 |
try:
|
| 401 |
query_embedding = self.embedding_provider.embed_text(query.semantic_query)
|
| 402 |
+
print(f"DEBUG: query_embedding first 5 values: {query_embedding[:5] if query_embedding else 'None'}", file=sys.stderr)
|
| 403 |
except OSError:
|
| 404 |
return results
|
| 405 |
|
|
|
|
| 501 |
embeddings_list.append(doc_data["embedding"])
|
| 502 |
doc_ids.append(doc_id)
|
| 503 |
|
| 504 |
+
# DEBUG - check first few embeddings
|
| 505 |
+
if embeddings_list:
|
| 506 |
+
import sys
|
| 507 |
+
print(f"DEBUG: First document embedding first 5: {embeddings_list[0][:5] if len(embeddings_list) > 0 else 'None'}", file=sys.stderr)
|
| 508 |
+
if len(embeddings_list) > 1:
|
| 509 |
+
print(f"DEBUG: Second document embedding first 5: {embeddings_list[1][:5] if len(embeddings_list) > 1 else 'None'}", file=sys.stderr)
|
| 510 |
+
same = embeddings_list[0][:10] == embeddings_list[1][:10] # Compare first 10 values
|
| 511 |
+
print(f"DEBUG: First two embeddings identical? {same}", file=sys.stderr)
|
| 512 |
+
|
| 513 |
if not embeddings_list:
|
| 514 |
return self._search_context_store_keyword(query)
|
| 515 |
|