Bellok commited on
Commit
e26c40f
·
1 Parent(s): 5e6fb91

chore-debug: add debug prints to inspect query and document embeddings

Browse files

During debugging, added stderr prints to output first 5 values of query embeddings and compare the first 10 values of the first two document embeddings to check for duplicates or issues in the retrieval process.

Files changed (1) hide show
  1. warbler_cda/retrieval_api.py +11 -1
warbler_cda/retrieval_api.py CHANGED
@@ -396,9 +396,10 @@ class RetrievalAPI:
396
 
397
  # If embedding provider available, use it
398
  if self.embedding_provider:
399
- # Get query embedding
400
  try:
401
  query_embedding = self.embedding_provider.embed_text(query.semantic_query)
 
402
  except OSError:
403
  return results
404
 
@@ -500,6 +501,15 @@ class RetrievalAPI:
500
  embeddings_list.append(doc_data["embedding"])
501
  doc_ids.append(doc_id)
502
 
 
 
 
 
 
 
 
 
 
503
  if not embeddings_list:
504
  return self._search_context_store_keyword(query)
505
 
 
396
 
397
  # If embedding provider available, use it
398
  if self.embedding_provider:
399
+ # Get query embedding
400
  try:
401
  query_embedding = self.embedding_provider.embed_text(query.semantic_query)
402
+ print(f"DEBUG: query_embedding first 5 values: {query_embedding[:5] if query_embedding else 'None'}", file=sys.stderr)
403
  except OSError:
404
  return results
405
 
 
501
  embeddings_list.append(doc_data["embedding"])
502
  doc_ids.append(doc_id)
503
 
504
+ # DEBUG - check first few embeddings
505
+ if embeddings_list:
506
+ import sys
507
+ print(f"DEBUG: First document embedding first 5: {embeddings_list[0][:5] if len(embeddings_list) > 0 else 'None'}", file=sys.stderr)
508
+ if len(embeddings_list) > 1:
509
+ print(f"DEBUG: Second document embedding first 5: {embeddings_list[1][:5] if len(embeddings_list) > 1 else 'None'}", file=sys.stderr)
510
+ same = embeddings_list[0][:10] == embeddings_list[1][:10] # Compare first 10 values
511
+ print(f"DEBUG: First two embeddings identical? {same}", file=sys.stderr)
512
+
513
  if not embeddings_list:
514
  return self._search_context_store_keyword(query)
515