hiteshwar21's picture
Update app.py
6b267c3 verified
#!/usr/bin/env python3
"""Final Optimized MANIT RAG Chatbot"""
from typing import List, Dict
import gradio as gr
import numpy as np
import faiss
import pickle
import os
import time
from sentence_transformers import SentenceTransformer
from src.retrieval.semantic_retriever import OptimizedSemanticRetriever # Updated import
from src.generation.response_generator import ResponseGenerator
from config.settings import config
num_cores = os.cpu_count()
print(f"Number of CPU cores: {num_cores}")
class OptimizedMANITChatbot:
"""Performance optimized chatbot class"""
def __init__(self):
self.initialized = False
self.initialization_status = "Starting initialization..."
self.setup_components()
def setup_components(self):
"""Initialize components with performance monitoring"""
try:
print("=== MANIT Chatbot Initialization ===")
self.initialization_status = "Loading vector store files..."
load_start = time.time()
# Load vector store components
self.embeddings = np.load(os.path.join(config.VECTOR_STORE_PATH, "embeddings.npy"))
self.faiss_index = faiss.read_index(os.path.join(config.VECTOR_STORE_PATH, "faiss_index.bin"))
with open(os.path.join(config.VECTOR_STORE_PATH, "chunks.pkl"), "rb") as f:
self.chunks = pickle.load(f)
with open(os.path.join(config.VECTOR_STORE_PATH, "bm25.pkl"), "rb") as f:
self.bm25 = pickle.load(f)
with open(os.path.join(config.VECTOR_STORE_PATH, "relationships.pkl"), "rb") as f:
self.relationships = pickle.load(f)
load_time = time.time() - load_start
print(f"Vector store loaded in {load_time:.2f}s")
self.initialization_status = "Loading embedding model..."
model_start = time.time()
# Initialize embedding model
self.embedding_model = SentenceTransformer(config.EMBEDDING_MODEL, device='cpu')
model_time = time.time() - model_start
print(f"Embedding model loaded in {model_time:.2f}s")
self.initialization_status = "Initializing retrieval components..."
# Initialize optimized retriever
self.retriever = OptimizedSemanticRetriever(
embedding_model=self.embedding_model,
faiss_index=self.faiss_index,
chunks=self.chunks,
bm25_index=self.bm25,
relationships=self.relationships
)
# Initialize response generator
self.generator = ResponseGenerator()
self.initialization_status = "Warming up system..."
# Warm up with a test query
warmup_start = time.time()
test_chunks = self.retriever.retrieve("test warmup query")
warmup_time = time.time() - warmup_start
print(f"System warmup completed in {warmup_time:.2f}s")
total_time = time.time() - (load_start - warmup_start + load_start)
self.initialization_status = "Ready!"
self.initialized = True
print(f"=== Initialization Complete in {total_time:.2f}s ===")
print(f"Performance Mode: {config.PERFORMANCE_MODE}")
print(f"Retrieval K: {config.retrieval_k}")
print(f"Using Reranker: {config.use_reranker}")
except Exception as e:
print(f"Initialization failed: {e}")
self.initialization_status = f"Error: {str(e)}"
def process_query_stream(self, query: str):
"""Stream response with performance monitoring"""
if not self.initialized:
yield f"System Error: {self.initialization_status}"
return
if not query.strip():
yield "Please enter a question about MANIT Bhopal."
return
try:
print(f"\n--- Processing Query: {query} ---")
total_start = time.time()
# Retrieve relevant documents
retrieval_start = time.time()
retrieved_chunks = self.retriever.retrieve(query)
retrieval_time = time.time() - retrieval_start
if not retrieved_chunks:
yield "I couldn't find relevant information about this topic. Please try another question."
return
print(f"Retrieved {len(retrieved_chunks)} chunks in {retrieval_time:.2f}s")
# Format context
context = self._format_context(retrieved_chunks)
# Check if web search is needed
web_context = ""
if self.generator.needs_web_search(query, context):
web_search_start = time.time()
web_results = self.generator.web_search(query)
web_search_time = time.time() - web_search_start
print(f"Web search completed in {web_search_time:.2f}s")
if web_results:
web_context = "\n\n".join(web_results)
# Stream the response
generation_start = time.time()
response_chunks = 0
for chunk in self.generator.generate_response_stream(query, context, web_context):
response_chunks += 1
yield chunk
generation_time = time.time() - generation_start
total_time = time.time() - total_start
print(f"Response generated in {generation_time:.2f}s ({response_chunks} chunks)")
print(f"Total query time: {total_time:.2f}s")
except Exception as e:
print(f"Error processing query: {e}")
yield "I encountered an error processing your question. Please try again."
def _format_context(self, chunks: List[Dict]) -> str:
"""Format context for the prompt"""
context_parts = []
for chunk in chunks:
source = chunk['metadata']['source']
content = chunk['content']
context_parts.append(f"Source: {source}\nContent: {content}")
return "\n\n---\n\n".join(context_parts)
def create_interface():
"""Create performance optimized Gradio interface"""
print("Initializing MANIT Chatbot Interface...")
chatbot_instance = OptimizedMANITChatbot()
def chat_fn(message, history):
"""Optimized chat function with better error handling"""
if not chatbot_instance.initialized:
error_msg = f"⚠️ System Status: {chatbot_instance.initialization_status}"
history.append([message, error_msg])
return history, ""
# Add the user's message to history
history.append([message, ""])
try:
# Stream the response
for chunk in chatbot_instance.process_query_stream(message):
history[-1][1] += chunk
yield history, ""
except Exception as e:
print(f"Chat function error: {e}")
history[-1][1] = "I encountered an error. Please try again."
yield history, ""
with gr.Blocks(
title="MANIT Bhopal Expert Assistant - Optimized",
theme=gr.themes.Soft(),
) as demo:
gr.HTML(f"""
<div style="text-align: center; margin-bottom: 20px;">
<h1>πŸŽ“ MANIT Bhopal Assistant</h1>
<p>Ask questions about programs, admissions, faculty, facilities, research, and more.</p>
</div>
""")
chatbot_ui = gr.Chatbot(
height=500,
show_label=False,
avatar_images=[None, "πŸŽ“"],
show_copy_button=True,
placeholder="Hi! I'm your assistant."
)
with gr.Row():
msg = gr.Textbox(
label="Your Question",
placeholder="Ask about MANIT Bhopal...",
scale=8,
lines=2
)
submit = gr.Button("Send", scale=1, variant="primary")
gr.Examples(
examples=[
"Who is the director of MANIT?",
"What are the dispensary timings?",
"Tell me about the computer science department",
"What research facilities are available?",
"What are the guest house prices?"
],
inputs=msg,
label="Example Questions"
)
gr.HTML("""
<div class="performance-info" style="text-align: center; margin-top: 10px;">
<p>Optimized for faster response times while maintaining accuracy</p>
</div>
""")
# Event handlers
msg.submit(chat_fn, [msg, chatbot_ui], [chatbot_ui, msg])
submit.click(chat_fn, [msg, chatbot_ui], [chatbot_ui, msg])
return demo
if __name__ == "__main__":
demo = create_interface()
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False
)