Spaces:

hiteshwar21
/

MANIT-RAG-Chatbot

Sleeping

App Files Files Community

MANIT-RAG-Chatbot / app.py

hiteshwar21

Update app.py

6b267c3 verified 3 months ago

raw

history blame contribute delete

9.4 kB

	#!/usr/bin/env python3
	"""Final Optimized MANIT RAG Chatbot"""
	from typing import List, Dict
	import gradio as gr
	import numpy as np
	import faiss
	import pickle
	import os
	import time
	from sentence_transformers import SentenceTransformer
	from src.retrieval.semantic_retriever import OptimizedSemanticRetriever # Updated import
	from src.generation.response_generator import ResponseGenerator
	from config.settings import config

	num_cores = os.cpu_count()
	print(f"Number of CPU cores: {num_cores}")


	class OptimizedMANITChatbot:
	"""Performance optimized chatbot class"""

	def __init__(self):
	self.initialized = False
	self.initialization_status = "Starting initialization..."
	self.setup_components()

	def setup_components(self):
	"""Initialize components with performance monitoring"""
	try:
	print("=== MANIT Chatbot Initialization ===")

	self.initialization_status = "Loading vector store files..."
	load_start = time.time()

	# Load vector store components
	self.embeddings = np.load(os.path.join(config.VECTOR_STORE_PATH, "embeddings.npy"))
	self.faiss_index = faiss.read_index(os.path.join(config.VECTOR_STORE_PATH, "faiss_index.bin"))

	with open(os.path.join(config.VECTOR_STORE_PATH, "chunks.pkl"), "rb") as f:
	self.chunks = pickle.load(f)

	with open(os.path.join(config.VECTOR_STORE_PATH, "bm25.pkl"), "rb") as f:
	self.bm25 = pickle.load(f)

	with open(os.path.join(config.VECTOR_STORE_PATH, "relationships.pkl"), "rb") as f:
	self.relationships = pickle.load(f)

	load_time = time.time() - load_start
	print(f"Vector store loaded in {load_time:.2f}s")

	self.initialization_status = "Loading embedding model..."
	model_start = time.time()

	# Initialize embedding model
	self.embedding_model = SentenceTransformer(config.EMBEDDING_MODEL, device='cpu')

	model_time = time.time() - model_start
	print(f"Embedding model loaded in {model_time:.2f}s")

	self.initialization_status = "Initializing retrieval components..."

	# Initialize optimized retriever
	self.retriever = OptimizedSemanticRetriever(
	embedding_model=self.embedding_model,
	faiss_index=self.faiss_index,
	chunks=self.chunks,
	bm25_index=self.bm25,
	relationships=self.relationships
	)

	# Initialize response generator
	self.generator = ResponseGenerator()

	self.initialization_status = "Warming up system..."
	# Warm up with a test query
	warmup_start = time.time()
	test_chunks = self.retriever.retrieve("test warmup query")
	warmup_time = time.time() - warmup_start
	print(f"System warmup completed in {warmup_time:.2f}s")

	total_time = time.time() - (load_start - warmup_start + load_start)
	self.initialization_status = "Ready!"
	self.initialized = True

	print(f"=== Initialization Complete in {total_time:.2f}s ===")
	print(f"Performance Mode: {config.PERFORMANCE_MODE}")
	print(f"Retrieval K: {config.retrieval_k}")
	print(f"Using Reranker: {config.use_reranker}")

	except Exception as e:
	print(f"Initialization failed: {e}")
	self.initialization_status = f"Error: {str(e)}"

	def process_query_stream(self, query: str):
	"""Stream response with performance monitoring"""
	if not self.initialized:
	yield f"System Error: {self.initialization_status}"
	return

	if not query.strip():
	yield "Please enter a question about MANIT Bhopal."
	return

	try:
	print(f"\n--- Processing Query: {query} ---")
	total_start = time.time()

	# Retrieve relevant documents
	retrieval_start = time.time()
	retrieved_chunks = self.retriever.retrieve(query)
	retrieval_time = time.time() - retrieval_start

	if not retrieved_chunks:
	yield "I couldn't find relevant information about this topic. Please try another question."
	return

	print(f"Retrieved {len(retrieved_chunks)} chunks in {retrieval_time:.2f}s")

	# Format context
	context = self._format_context(retrieved_chunks)

	# Check if web search is needed
	web_context = ""
	if self.generator.needs_web_search(query, context):
	web_search_start = time.time()
	web_results = self.generator.web_search(query)
	web_search_time = time.time() - web_search_start
	print(f"Web search completed in {web_search_time:.2f}s")

	if web_results:
	web_context = "\n\n".join(web_results)

	# Stream the response
	generation_start = time.time()
	response_chunks = 0

	for chunk in self.generator.generate_response_stream(query, context, web_context):
	response_chunks += 1
	yield chunk

	generation_time = time.time() - generation_start
	total_time = time.time() - total_start

	print(f"Response generated in {generation_time:.2f}s ({response_chunks} chunks)")
	print(f"Total query time: {total_time:.2f}s")

	except Exception as e:
	print(f"Error processing query: {e}")
	yield "I encountered an error processing your question. Please try again."

	def _format_context(self, chunks: List[Dict]) -> str:
	"""Format context for the prompt"""
	context_parts = []

	for chunk in chunks:
	source = chunk['metadata']['source']
	content = chunk['content']
	context_parts.append(f"Source: {source}\nContent: {content}")

	return "\n\n---\n\n".join(context_parts)

	def create_interface():
	"""Create performance optimized Gradio interface"""

	print("Initializing MANIT Chatbot Interface...")
	chatbot_instance = OptimizedMANITChatbot()

	def chat_fn(message, history):
	"""Optimized chat function with better error handling"""
	if not chatbot_instance.initialized:
	error_msg = f"⚠️ System Status: {chatbot_instance.initialization_status}"
	history.append([message, error_msg])
	return history, ""

	# Add the user's message to history
	history.append([message, ""])

	try:
	# Stream the response
	for chunk in chatbot_instance.process_query_stream(message):
	history[-1][1] += chunk
	yield history, ""

	except Exception as e:
	print(f"Chat function error: {e}")
	history[-1][1] = "I encountered an error. Please try again."
	yield history, ""

	with gr.Blocks(
	title="MANIT Bhopal Expert Assistant - Optimized",
	theme=gr.themes.Soft(),
	) as demo:

	gr.HTML(f"""
	<div style="text-align: center; margin-bottom: 20px;">
	<h1>🎓 MANIT Bhopal Assistant</h1>
	<p>Ask questions about programs, admissions, faculty, facilities, research, and more.</p>
	</div>
	""")

	chatbot_ui = gr.Chatbot(
	height=500,
	show_label=False,
	avatar_images=[None, "🎓"],
	show_copy_button=True,
	placeholder="Hi! I'm your assistant."
	)

	with gr.Row():
	msg = gr.Textbox(
	label="Your Question",
	placeholder="Ask about MANIT Bhopal...",
	scale=8,
	lines=2
	)
	submit = gr.Button("Send", scale=1, variant="primary")

	gr.Examples(
	examples=[
	"Who is the director of MANIT?",
	"What are the dispensary timings?",
	"Tell me about the computer science department",
	"What research facilities are available?",
	"What are the guest house prices?"
	],
	inputs=msg,
	label="Example Questions"
	)

	gr.HTML("""
	<div class="performance-info" style="text-align: center; margin-top: 10px;">
	<p>Optimized for faster response times while maintaining accuracy</p>
	</div>
	""")

	# Event handlers
	msg.submit(chat_fn, [msg, chatbot_ui], [chatbot_ui, msg])
	submit.click(chat_fn, [msg, chatbot_ui], [chatbot_ui, msg])

	return demo

	if __name__ == "__main__":
	demo = create_interface()
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False
	)