minhhungg commited on
Commit
db40891
·
1 Parent(s): 4f1ce14

Adding program files

Browse files
Files changed (4) hide show
  1. agents.py +301 -0
  2. mcp_server.py +609 -0
  3. requirements.txt +8 -0
  4. utils.py +207 -0
agents.py ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import List, Dict, Any, Optional
3
+ import google.generativeai as genai
4
+ from openai import OpenAI
5
+ import logging
6
+ import json
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ class BaseAgent:
11
+ def __init__(self, use_gemini: bool = True, api_key: Optional[str] = None,
12
+ openrouter_model: Optional[str] = None, gemini_model: Optional[str] = None):
13
+ self.use_gemini = use_gemini
14
+ if use_gemini:
15
+ if not api_key:
16
+ raise ValueError("Gemini API key is required when use_gemini=True")
17
+ genai.configure(api_key=api_key)
18
+ self.gemini_model = gemini_model or "gemini-1.5-pro" # Use a good default model
19
+ else:
20
+ self.openrouter_client = OpenAI(
21
+ base_url="https://openrouter.ai/api/v1",
22
+ api_key=api_key
23
+ )
24
+ self.model = openrouter_model or "anthropic/claude-3-opus:beta"
25
+
26
+ def _generate_with_gemini(self, prompt: str, system_prompt: str) -> str:
27
+ try:
28
+ model = genai.GenerativeModel(model_name=self.gemini_model)
29
+ # Combine system prompt and user prompt for Gemini
30
+ combined_prompt = f"System: {system_prompt}\n\nUser: {prompt}"
31
+ response = model.generate_content(
32
+ combined_prompt,
33
+ generation_config=genai.types.GenerationConfig(
34
+ temperature=0.1
35
+ )
36
+ )
37
+ return response.text
38
+ except Exception as e:
39
+ logger.error(f"Gemini generation failed: {str(e)}")
40
+ raise
41
+
42
+ def _generate_with_openrouter(self, prompt: str, system_prompt: str) -> str:
43
+ completion = self.openrouter_client.chat.completions.create(
44
+ model=self.model,
45
+ messages=[
46
+ {"role": "system", "content": system_prompt},
47
+ {"role": "user", "content": prompt}
48
+ ],
49
+ temperature=0.1,
50
+ )
51
+ return completion.choices[0].message.content
52
+
53
+ def generate(self, prompt: str, system_prompt: str) -> str:
54
+ try:
55
+ if self.use_gemini:
56
+ return self._generate_with_gemini(prompt, system_prompt)
57
+ else:
58
+ return self._generate_with_openrouter(prompt, system_prompt)
59
+ except Exception as e:
60
+ logger.error(f"Generation failed: {str(e)}")
61
+ raise
62
+
63
+ class OrchestratorAgent(BaseAgent):
64
+ def __init__(self, *args, **kwargs):
65
+ super().__init__(*args, **kwargs)
66
+ self.system_prompt = """You are an expert research planner that develops comprehensive research strategies.
67
+ Your role is to create structured research plans that identify what information is needed and why.
68
+ Focus on the logical flow of information needed to answer the query comprehensively."""
69
+
70
+ def create_research_plan(self, query: str) -> Dict[str, List[str]]:
71
+ """Create a structured research plan with clear objectives"""
72
+ prompt = f"""Create a detailed research plan for the following query: {query}
73
+
74
+ Return a JSON object with the following structure:
75
+ {{
76
+ "core_concepts": ["list of fundamental concepts that need to be understood"],
77
+ "key_questions": ["specific questions that need to be answered"],
78
+ "information_requirements": ["specific pieces of information needed to answer each question"],
79
+ "research_priorities": ["ordered list of research priorities"]
80
+ }}
81
+
82
+ Make sure the plan flows logically and each item contributes to answering the main query."""
83
+
84
+ response = self.generate(prompt, self.system_prompt)
85
+ try:
86
+ # Clean the response of any markdown formatting
87
+ cleaned_response = response.strip().replace('```json', '').replace('```', '').strip()
88
+ plan = json.loads(cleaned_response)
89
+ logger.info(f"Generated research plan: {json.dumps(plan, indent=2)}")
90
+ return plan
91
+ except:
92
+ logger.error(f"Failed to parse research plan: {response}")
93
+ # Return a basic plan structure if parsing fails
94
+ return {
95
+ "core_concepts": [query],
96
+ "key_questions": [query],
97
+ "information_requirements": [query],
98
+ "research_priorities": [query]
99
+ }
100
+
101
+ def evaluate_research_progress(self, plan: Dict[str, List[str]], gathered_info: List[str]) -> Dict[str, bool]:
102
+ """Evaluate if we have enough information for each aspect of the plan"""
103
+ prompt = f"""Analyze the research plan and gathered information to evaluate completeness.
104
+
105
+ Research Plan:
106
+ {json.dumps(plan, indent=2)}
107
+
108
+ Gathered Information:
109
+ {chr(10).join(gathered_info)}
110
+
111
+ Your task: Return a STRICTLY FORMATTED JSON object with only three boolean fields indicating whether the gathered information adequately covers each aspect. Do not include any other text, explanation, or comments.
112
+
113
+ Required exact output format (with true/false values):
114
+ {{
115
+ "core_concepts": false,
116
+ "key_questions": false,
117
+ "information_requirements": false
118
+ }}
119
+
120
+ Rules:
121
+ - Set a field to true ONLY if the gathered information thoroughly covers that aspect
122
+ - Return ONLY the JSON object, no other text
123
+ - Must be valid JSON parseable by json.loads()"""
124
+
125
+ response = self.generate(prompt, self.system_prompt)
126
+ try:
127
+ # Remove any leading/trailing whitespace and quotes
128
+ cleaned_response = response.strip().strip('"').strip()
129
+ # Remove any markdown code block formatting
130
+ cleaned_response = cleaned_response.replace('```json', '').replace('```', '').strip()
131
+
132
+ # Parse and validate the response has the correct structure
133
+ parsed = json.loads(cleaned_response)
134
+ required_keys = {"core_concepts", "key_questions", "information_requirements"}
135
+ if not all(isinstance(parsed.get(key), bool) for key in required_keys):
136
+ raise ValueError("Response missing required boolean fields")
137
+
138
+ return parsed
139
+ except Exception as e:
140
+ logger.error(f"Failed to parse evaluation response: {response}")
141
+ # Return a default response indicating no completeness
142
+ return {
143
+ "core_concepts": False,
144
+ "key_questions": False,
145
+ "information_requirements": False
146
+ }
147
+
148
+ class PlannerAgent(BaseAgent):
149
+ def __init__(self, *args, **kwargs):
150
+ super().__init__(*args, **kwargs)
151
+ self.system_prompt = """You are an expert research planner that creates targeted search strategies.
152
+ Your role is to identify the key aspects that need deep investigation, focusing on quality over quantity.
153
+ Create research plans that encourage thorough exploration of important concepts rather than shallow coverage of many topics."""
154
+
155
+ def create_search_strategy(self, research_item: str, item_type: str) -> List[str]:
156
+ """Create targeted search queries based on the type of research item"""
157
+ prompt = f"""Create 2-3 highly specific search queries for this {item_type}: {research_item}
158
+
159
+ Focus on Depth:
160
+ - Start with foundational understanding
161
+ - Build up to technical specifics and implementation details
162
+ - Look for real-world examples and case studies
163
+ - Find comparative analyses and benchmarks
164
+ - Seek out critical discussions and limitations
165
+
166
+ Guidelines:
167
+ - Prefer fewer, more focused queries over many broad ones
168
+ - Each query should build on previous knowledge
169
+ - Target high-quality technical sources
170
+ - Look for detailed explanations rather than surface-level overviews
171
+
172
+ Return ONLY a JSON array of 2-3 carefully crafted search queries that will yield deep technical information.
173
+ Make each query highly specific and targeted."""
174
+
175
+ response = self.generate(prompt, self.system_prompt)
176
+ try:
177
+ cleaned_response = response.strip().replace('```json', '').replace('```', '').strip()
178
+ queries = json.loads(cleaned_response)
179
+ return [str(q) for q in queries[:3]]
180
+ except:
181
+ logger.error(f"Failed to parse search queries: {response}")
182
+ return [str(research_item)]
183
+
184
+ def prioritize_unfulfilled_requirements(self, plan: Dict[str, List[str]], progress: Dict[str, bool], gathered_info: List[str] = None) -> List[tuple]:
185
+ """Create a prioritized list of remaining research needs with depth checking"""
186
+ items = []
187
+
188
+ def has_sufficient_depth(topic: str, info: List[str]) -> bool:
189
+ if not info:
190
+ return False
191
+
192
+ # Count substantial mentions (more than just a passing reference)
193
+ substantial_mentions = 0
194
+ for text in info:
195
+ topic_words = set(topic.lower().split())
196
+ text_lower = text.lower()
197
+
198
+ # Check if the text contains multiple topic keywords
199
+ keyword_matches = sum(1 for word in topic_words if word in text_lower)
200
+
201
+ # Check for substantial content (contains multiple keywords and is detailed)
202
+ if keyword_matches >= 2 and len(text) > 300:
203
+ substantial_mentions += 1
204
+
205
+ # Require multiple substantial mentions
206
+ return substantial_mentions >= 2
207
+
208
+ # First priority: core concepts without sufficient depth
209
+ if not progress["core_concepts"]:
210
+ for item in plan["core_concepts"]:
211
+ if not gathered_info or not has_sufficient_depth(item, gathered_info):
212
+ items.append(("core_concepts", item))
213
+
214
+ # Second priority: key questions without sufficient answers
215
+ if not progress["key_questions"]:
216
+ for item in plan["key_questions"]:
217
+ if not gathered_info or not has_sufficient_depth(item, gathered_info):
218
+ items.append(("key_questions", item))
219
+
220
+ # Third priority: detailed information requirements
221
+ if not progress["information_requirements"]:
222
+ for item in plan["information_requirements"]:
223
+ if not gathered_info or not has_sufficient_depth(item, gathered_info):
224
+ items.append(("information_requirements", item))
225
+
226
+ return items
227
+
228
+ class ReportAgent(BaseAgent):
229
+ def __init__(self, *args, **kwargs):
230
+ super().__init__(*args, **kwargs)
231
+ self.system_prompt = """You are an expert technical writer and researcher that creates
232
+ comprehensive, well-structured research reports. Your primary focus is on deep analysis,
233
+ synthesis of information, and meaningful organization of content.
234
+
235
+ Key Principles:
236
+ 1. Quality over Quantity - Focus on depth and insight rather than filling sections
237
+ 2. Natural Organization - Let the content guide the structure instead of forcing a rigid outline
238
+ 3. Meaningful Connections - Draw relationships between different pieces of information
239
+ 4. Critical Analysis - Question assumptions and evaluate trade-offs
240
+ 5. Evidence-Based - Support claims with specific technical details and examples"""
241
+
242
+ def generate_report(self, query: str, research_plan: Dict[str, List[str]],
243
+ research_results: List[str], completion_stats: Dict[str, Any]) -> str:
244
+ prompt = f"""Generate a comprehensive technical report that synthesizes the research findings into a cohesive narrative.
245
+
246
+ Query: {query}
247
+
248
+ Research Plan:
249
+ {json.dumps(research_plan, indent=2)}
250
+
251
+ Research Coverage:
252
+ {json.dumps(completion_stats, indent=2)}
253
+
254
+ Research Findings:
255
+ {chr(10).join(research_results)}
256
+
257
+ Report Requirements:
258
+
259
+ 1. Organization:
260
+ - Start with a clear introduction that frames the topic
261
+ - Group related concepts together naturally
262
+ - Only create sections when there's enough substantial content
263
+ - Use appropriate heading levels (# for h1, ## for h2, etc.)
264
+ - Maintain a logical flow of ideas
265
+
266
+ 2. Content Development:
267
+ - Focus on in-depth analysis of important concepts
268
+ - Provide concrete examples and technical details
269
+ - Compare and contrast different approaches
270
+ - Discuss real-world implications
271
+ - Acknowledge limitations and trade-offs
272
+
273
+ 3. Synthesis & Analysis:
274
+ - Draw meaningful connections between different sources
275
+ - Evaluate conflicting information
276
+ - Identify patterns and trends
277
+ - Provide reasoned analysis supported by evidence
278
+ - Offer insights beyond just summarizing sources
279
+
280
+ 4. Technical Accuracy:
281
+ - Use precise technical language
282
+ - Include relevant code examples with language tags
283
+ - Provide performance metrics when available
284
+ - Explain technical concepts clearly
285
+ - Support technical claims with evidence
286
+
287
+ 5. Formatting:
288
+ - Use proper markdown formatting
289
+ - Include code blocks with language tags when relevant
290
+ - Format lists and tables appropriately
291
+ - Add line breaks between sections
292
+ - Ensure consistent formatting throughout
293
+
294
+ Important:
295
+ - Do NOT create sections just to fill a structure
296
+ - Combine related information even if it came from different parts of the research plan
297
+ - Focus on providing meaningful insights rather than covering every possible aspect
298
+ - Only include information that contributes to understanding the topic
299
+ - Skip sections or topics where there isn't enough substantive content"""
300
+
301
+ return self.generate(prompt, self.system_prompt)
mcp_server.py ADDED
@@ -0,0 +1,609 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import logging
4
+ from datetime import datetime
5
+ import gradio as gr
6
+ import google.generativeai as genai
7
+ from tavily import TavilyClient
8
+ from dotenv import load_dotenv
9
+ from logger_config import setup_logging
10
+ from typing import List, Dict, Any, Optional
11
+ from utils import (
12
+ validate_response,
13
+ parse_research_results,
14
+ format_sources_section,
15
+ save_markdown_report,
16
+ convert_to_html
17
+ )
18
+ # Base server class for MCP
19
+ class MCPServer:
20
+ def __init__(self):
21
+ self.test_mode = False
22
+
23
+ def process_request(self, request: Dict[str, Any]) -> Dict[str, Any]:
24
+ """Process a research request"""
25
+ raise NotImplementedError("Subclasses must implement process_request")
26
+
27
+ def create_interface(self) -> gr.Blocks:
28
+ """Create the Gradio interface"""
29
+ raise NotImplementedError("Subclasses must implement create_interface")
30
+
31
+ from agents import OrchestratorAgent, PlannerAgent, ReportAgent
32
+
33
+ # Set up logging
34
+ loggers = setup_logging()
35
+ server_logger = loggers['server']
36
+
37
+ class MultiAgentSystem:
38
+ def __init__(self, use_gemini=True, gemini_api_key=None, gemini_model=None,
39
+ tavily_api_key=None, openrouter_api_key=None, openrouter_model=None):
40
+ self.use_gemini = use_gemini
41
+ self.gemini_api_key = gemini_api_key
42
+ self.gemini_model = gemini_model
43
+ self.tavily_api_key = tavily_api_key
44
+ self.openrouter_api_key = openrouter_api_key
45
+ self.openrouter_model = openrouter_model
46
+
47
+ # Initialize agents
48
+ self.orchestrator = OrchestratorAgent(
49
+ use_gemini=use_gemini,
50
+ api_key=gemini_api_key if use_gemini else openrouter_api_key,
51
+ openrouter_model=openrouter_model,
52
+ gemini_model=gemini_model
53
+ )
54
+ self.planner = PlannerAgent(
55
+ use_gemini=use_gemini,
56
+ api_key=gemini_api_key if use_gemini else openrouter_api_key,
57
+ openrouter_model=openrouter_model,
58
+ gemini_model=gemini_model
59
+ )
60
+ self.report_agent = ReportAgent(
61
+ use_gemini=use_gemini,
62
+ api_key=gemini_api_key if use_gemini else openrouter_api_key,
63
+ openrouter_model=openrouter_model,
64
+ gemini_model=gemini_model
65
+ )
66
+
67
+ # Initialize Tavily client
68
+ if tavily_api_key:
69
+ self.tavily_client = TavilyClient(api_key=tavily_api_key)
70
+ else:
71
+ self.tavily_client = None
72
+
73
+ def web_search(self, query: str) -> List[Dict[str, str]]:
74
+ """Perform web search using Tavily"""
75
+ if not self.tavily_client:
76
+ raise ValueError("Tavily API key not provided")
77
+
78
+ try:
79
+ response = self.tavily_client.search(
80
+ query,
81
+ search_depth="advanced", # Only 'basic' or 'advanced' are allowed
82
+ max_results=5, # Limit results to keep responses focused
83
+ async_search=True, # Use async search for better performance
84
+ timeout=30 # 30 second timeout
85
+ )
86
+ return response.get('results', [])
87
+ except Exception as e:
88
+ server_logger.error(f"Web search failed: {str(e)}")
89
+ raise # Re-raise the exception to handle it in the calling code
90
+
91
+ def process_query(self, query: str) -> str:
92
+ """Process a research query using the multi-agent system"""
93
+ try:
94
+ # Step 1: Create a structured research plan
95
+ server_logger.info("Creating research plan...")
96
+ research_plan = self.orchestrator.create_research_plan(query)
97
+ server_logger.info(f"Generated research plan: {json.dumps(research_plan, indent=2)}")
98
+
99
+ # Step 2: Initialize research process
100
+ all_search_results = []
101
+ MAX_SEARCHES_TOTAL = 30 # Total search limit
102
+ MIN_RESULTS_PER_ITEM = 3 # Minimum results before checking progress
103
+ MAX_ATTEMPTS_PER_ITEM = 2 # Maximum attempts to research each item
104
+ search_count = 0
105
+ seen_urls = set() # Track seen URLs to avoid duplicates
106
+
107
+ # Track research attempts for each item to prevent loops
108
+ research_attempts = {}
109
+
110
+ # Step 3: Conduct initial research
111
+ while search_count < MAX_SEARCHES_TOTAL:
112
+ # Evaluate current progress
113
+ current_results = [r['content'] for r in all_search_results]
114
+ progress = self.orchestrator.evaluate_research_progress(research_plan, current_results)
115
+
116
+ # Check if we have completed all aspects
117
+ if all(progress.values()):
118
+ server_logger.info("Research complete - all aspects covered with sufficient depth")
119
+ break
120
+
121
+ # Get prioritized list of unfulfilled research needs
122
+ remaining_items = self.planner.prioritize_unfulfilled_requirements(
123
+ research_plan,
124
+ progress,
125
+ current_results
126
+ )
127
+
128
+ if not remaining_items:
129
+ break
130
+
131
+ # Research each remaining item
132
+ for item_type, research_item in remaining_items:
133
+ # Check if we've exceeded attempts for this item
134
+ item_key = f"{item_type}:{research_item}"
135
+ if research_attempts.get(item_key, 0) >= MAX_ATTEMPTS_PER_ITEM:
136
+ server_logger.info(f"Reached maximum attempts for {item_key}")
137
+ continue
138
+
139
+ if search_count >= MAX_SEARCHES_TOTAL:
140
+ server_logger.info(f"Reached maximum total searches ({MAX_SEARCHES_TOTAL})")
141
+ break
142
+
143
+ server_logger.info(f"Researching {item_type}: {research_item}")
144
+ search_queries = self.planner.create_search_strategy(research_item, item_type)
145
+
146
+ # Track this research attempt
147
+ research_attempts[item_key] = research_attempts.get(item_key, 0) + 1
148
+
149
+ # Conduct searches for this item
150
+ item_results = []
151
+ for search_query in search_queries:
152
+ if search_count >= MAX_SEARCHES_TOTAL:
153
+ break
154
+
155
+ # Ensure search query is a simple string
156
+ query_str = str(search_query).strip()
157
+ if not query_str:
158
+ continue
159
+
160
+ server_logger.info(f"Searching for: {query_str}")
161
+ results = self.web_search(query_str)
162
+
163
+ # Deduplicate and filter results
164
+ new_results = []
165
+ for result in results:
166
+ url = result.get('url')
167
+ content = result.get('content', '').strip()
168
+
169
+ # Skip if URL seen or content too short
170
+ if not url or url in seen_urls or len(content) < 100:
171
+ continue
172
+
173
+ # Check if content is relevant to the research item
174
+ if any(keyword.lower() in content.lower()
175
+ for keyword in research_item.lower().split()):
176
+ seen_urls.add(url)
177
+ new_results.append(result)
178
+
179
+ item_results.extend(new_results)
180
+ search_count += 1
181
+
182
+ # Check if we have enough detailed results for this item
183
+ if len(item_results) >= MIN_RESULTS_PER_ITEM and all(
184
+ len(r.get('content', '')) > 200 for r in item_results
185
+ ):
186
+ break
187
+
188
+ all_search_results.extend(item_results)
189
+
190
+ # Step 4: Generate final report
191
+ server_logger.info("Generating final report...")
192
+ contexts, sources = parse_research_results(all_search_results)
193
+
194
+ # Add research completion statistics
195
+ completion_stats = {
196
+ "total_searches": search_count,
197
+ "unique_sources": len(seen_urls),
198
+ "research_coverage": {k: v for k, v in progress.items()}
199
+ }
200
+ server_logger.info(f"Research stats: {json.dumps(completion_stats, indent=2)}")
201
+
202
+ report = self.report_agent.generate_report(
203
+ query=query,
204
+ research_plan=research_plan,
205
+ research_results=contexts,
206
+ completion_stats=completion_stats
207
+ )
208
+
209
+ # Add sources section to the report
210
+ report += "\n\n" + format_sources_section(sources)
211
+
212
+ return report
213
+
214
+ except Exception as e:
215
+ server_logger.error(f"Error in process_query: {str(e)}", exc_info=True)
216
+ raise
217
+
218
+ # Global UI component for progress tracking
219
+ progress_output = None
220
+
221
+ def create_interface():
222
+ """Create the Gradio interface with API key inputs"""
223
+ global progress_output
224
+
225
+ css = """
226
+ .log-container {
227
+ margin: 16px 0;
228
+ }
229
+ .log-output {
230
+ font-family: monospace;
231
+ white-space: pre !important;
232
+ height: 300px;
233
+ overflow-y: auto;
234
+ background-color: #1e1e1e !important;
235
+ color: #d4d4d4 !important;
236
+ padding: 10px;
237
+ border-radius: 4px;
238
+ }
239
+ .research-progress {
240
+ position: relative;
241
+ }
242
+ .minimize-btn {
243
+ position: absolute;
244
+ right: 10px;
245
+ top: 10px;
246
+ }
247
+ """
248
+
249
+ with gr.Blocks(title="Multi-Agent Research System", css=css) as interface:
250
+ gr.Markdown(
251
+ """# Multi-Agent Research System
252
+
253
+ This system uses multiple AI agents to perform comprehensive research and analysis.
254
+ Please provide your API keys to begin."""
255
+ )
256
+
257
+ # Progress tracking container with minimize button
258
+ with gr.Row(elem_classes="log-container"):
259
+ with gr.Column(elem_classes="research-progress"):
260
+ progress_output = gr.Textbox(
261
+ value="Waiting to begin research...",
262
+ elem_classes=["log-output"],
263
+ show_label=False,
264
+ lines=10,
265
+ max_lines=20,
266
+ interactive=False
267
+ )
268
+ minimize_btn = gr.Button("🔽", elem_classes="minimize-btn")
269
+
270
+ with gr.Row():
271
+ api_type = gr.Radio(
272
+ choices=["Gemini", "OpenRouter"],
273
+ label="Choose API Type",
274
+ value="Gemini",
275
+ info="Select which API to use for the agents"
276
+ )
277
+
278
+ with gr.Row():
279
+ with gr.Column():
280
+ gemini_key = gr.Textbox(
281
+ label="Gemini API Key",
282
+ placeholder="Enter your Gemini API key",
283
+ type="password"
284
+ )
285
+ gemini_model = gr.Dropdown(
286
+ label="Gemini Model",
287
+ choices=[
288
+ "gemini-2.0-flash",
289
+ "gemini-2.0-flash-lite",
290
+ "gemini-1.5-pro",
291
+ "gemini-2.5-pro-preview-05-06",
292
+ "gemini-2.5-flash-preview-04-17"
293
+ ],
294
+ value="gemini-2.0-flash",
295
+ info="Choose Gemini model version"
296
+ )
297
+ with gr.Column():
298
+ tavily_key = gr.Textbox(
299
+ label="Tavily API Key (Required)",
300
+ placeholder="Enter your Tavily API key",
301
+ type="password"
302
+ )
303
+
304
+ with gr.Row():
305
+ with gr.Column():
306
+ openrouter_key = gr.Textbox(
307
+ label="OpenRouter API Key",
308
+ placeholder="Enter your OpenRouter API key",
309
+ type="password",
310
+ visible=False
311
+ )
312
+ openrouter_model = gr.Textbox(
313
+ label="OpenRouter Model ID",
314
+ placeholder="e.g., anthropic/claude-3-opus:beta",
315
+ info="Enter any valid OpenRouter model ID",
316
+ value="anthropic/claude-3-opus:beta",
317
+ visible=False
318
+ )
319
+
320
+ query_input = gr.Textbox(
321
+ label="Research Query",
322
+ placeholder="Enter your research question...",
323
+ lines=3,
324
+ info="Enter a detailed research question or topic to investigate"
325
+ )
326
+
327
+ submit_btn = gr.Button("Begin Research", variant="primary")
328
+
329
+ with gr.Row():
330
+ output = gr.Markdown(label="Research Results")
331
+ download_md = gr.File(label="Download Markdown Report", visible=False)
332
+ download_html = gr.File(label="Download HTML Report", visible=False)
333
+
334
+ def update_api_visibility(choice):
335
+ if choice == "Gemini":
336
+ return {
337
+ gemini_key: gr.update(visible=True),
338
+ gemini_model: gr.update(visible=True),
339
+ openrouter_key: gr.update(visible=False),
340
+ openrouter_model: gr.update(visible=False)
341
+ }
342
+ else:
343
+ return {
344
+ gemini_key: gr.update(visible=False),
345
+ gemini_model: gr.update(visible=False),
346
+ openrouter_key: gr.update(visible=True),
347
+ openrouter_model: gr.update(visible=True)
348
+ }
349
+
350
+ def run_research(query, api_type, gemini_key, gemini_model, tavily_key, openrouter_key, openrouter_model):
351
+ try:
352
+ if not tavily_key:
353
+ server_logger.error("Missing Tavily API key")
354
+ return gr.update(value="Error: Missing Tavily API key"), "Please provide a Tavily API key for web search capability."
355
+
356
+ if api_type == "Gemini" and not gemini_key:
357
+ server_logger.error("Missing Gemini API key")
358
+ return gr.update(value="Error: Missing Gemini API key"), "Please provide a Gemini API key when using Gemini mode."
359
+
360
+ if api_type == "OpenRouter" and not openrouter_key:
361
+ server_logger.error("Missing OpenRouter API key")
362
+ return gr.update(value="Error: Missing OpenRouter API key"), "Please provide an OpenRouter API key when using OpenRouter mode."
363
+
364
+ # Initialize log capture
365
+ class LogCaptureHandler(logging.Handler):
366
+ def __init__(self):
367
+ super().__init__()
368
+ self.logs = []
369
+
370
+ def emit(self, record):
371
+ msg = self.format(record)
372
+ self.logs.append(msg)
373
+ return gr.update(value="\n".join(self.logs))
374
+
375
+ log_handler = LogCaptureHandler()
376
+ log_handler.setFormatter(logging.Formatter('%(levelname)s - %(message)s'))
377
+ server_logger.addHandler(log_handler)
378
+
379
+ # Initialize system and run query
380
+ system = MultiAgentSystem(
381
+ use_gemini=(api_type == "Gemini"),
382
+ gemini_api_key=gemini_key if api_type == "Gemini" else None,
383
+ gemini_model=gemini_model if api_type == "Gemini" else None,
384
+ tavily_api_key=tavily_key,
385
+ openrouter_api_key=openrouter_key if api_type == "OpenRouter" else None,
386
+ openrouter_model=openrouter_model if api_type == "OpenRouter" else None
387
+ )
388
+
389
+ result = system.process_query(query)
390
+
391
+ # Save markdown report and get file path
392
+ md_file_path = save_markdown_report(result)
393
+ html_file_path = convert_to_html(result)
394
+
395
+ server_logger.removeHandler(log_handler)
396
+ return (
397
+ gr.update(value="\n".join(log_handler.logs)), # Progress output
398
+ result, # Markdown output
399
+ gr.update(value=md_file_path, visible=True), # Download markdown button
400
+ gr.update(value=html_file_path, visible=True) # Download HTML button
401
+ )
402
+
403
+ except Exception as e:
404
+ server_logger.error(f"Research failed: {str(e)}", exc_info=True)
405
+ error_msg = f"ERROR: Research failed: {str(e)}"
406
+ return (
407
+ gr.update(value=error_msg), # Progress output
408
+ error_msg, # Markdown output
409
+ gr.update(visible=False), # Hide download button
410
+ gr.update(visible=False) # Hide download button
411
+ )
412
+
413
+ # Connect event handlers
414
+ api_type.change(
415
+ fn=update_api_visibility,
416
+ inputs=[api_type],
417
+ outputs=[gemini_key, gemini_model, openrouter_key, openrouter_model]
418
+ )
419
+
420
+ submit_btn.click(
421
+ fn=run_research,
422
+ inputs=[
423
+ query_input, api_type, gemini_key, gemini_model,
424
+ tavily_key, openrouter_key, openrouter_model
425
+ ],
426
+ outputs=[progress_output, output, download_md, download_html],
427
+ show_progress="full"
428
+ )
429
+
430
+ gr.Examples(
431
+ examples=[
432
+ ["What are the latest advances in transformer architecture optimizations?"],
433
+ ["Explain the mathematical foundations of diffusion models"],
434
+ ["Compare and analyze different approaches to few-shot learning"]
435
+ ],
436
+ inputs=query_input
437
+ )
438
+
439
+ return interface
440
+
441
+ class GradioMCPServer(MCPServer):
442
+ def __init__(self, use_gemini: bool = True,
443
+ gemini_api_key: Optional[str] = None,
444
+ gemini_model: Optional[str] = None,
445
+ tavily_api_key: Optional[str] = None,
446
+ openrouter_api_key: Optional[str] = None,
447
+ openrouter_model: Optional[str] = None):
448
+ super().__init__()
449
+ self.test_mode = False
450
+
451
+ # Initialize the multi-agent system
452
+ self.agent_system = MultiAgentSystem(
453
+ use_gemini=use_gemini,
454
+ gemini_api_key=gemini_api_key,
455
+ gemini_model=gemini_model,
456
+ tavily_api_key=tavily_api_key,
457
+ openrouter_api_key=openrouter_api_key,
458
+ openrouter_model=openrouter_model
459
+ )
460
+
461
+ def process_request(self, request: Dict[str, Any]) -> Dict[str, Any]:
462
+ """Process research requests and return markdown report"""
463
+ try:
464
+ query = request.get('query', '')
465
+ output_format = request.get('format', 'markdown')
466
+
467
+ if self.test_mode:
468
+ markdown_text = """# Test Mode Response
469
+
470
+ ## Overview
471
+ This is a sample report generated in test mode without using API credits.
472
+
473
+ ## Key Findings
474
+ 1. Test finding one
475
+ 2. Test finding two
476
+
477
+ ## Test Results
478
+ Sample analysis content...
479
+ """
480
+ file_path = save_markdown_report(markdown_text) if output_format == 'markdown' else convert_to_html(markdown_text)
481
+ else:
482
+ # Use multi-agent system to process query
483
+ report, _, _ = self.agent_system.process_query(query)
484
+ file_path = save_markdown_report(report) if output_format == 'markdown' else convert_to_html(report)
485
+ markdown_text = report
486
+
487
+ # Return response with markdown content and file path
488
+ return {
489
+ "response": markdown_text,
490
+ "file_path": file_path,
491
+ "status": "success"
492
+ }
493
+
494
+ except Exception as e:
495
+ server_logger.error(f"Error processing request: {str(e)}")
496
+ return {
497
+ "response": f"Error: {str(e)}",
498
+ "file_path": None,
499
+ "status": "error"
500
+ }
501
+
502
+ def create_interface(self) -> gr.Blocks:
503
+ """Create the Gradio interface with markdown preview and file download"""
504
+ with gr.Blocks(title="Research Assistant", theme=gr.themes.Soft()) as interface:
505
+ gr.Markdown("# Research Assistant")
506
+
507
+ with gr.Row():
508
+ with gr.Column(scale=3):
509
+ query_input = gr.Textbox(
510
+ label="Research Query",
511
+ placeholder="Enter your research question...",
512
+ lines=3
513
+ )
514
+ with gr.Column(scale=1):
515
+ test_mode_checkbox = gr.Checkbox(
516
+ label="Test Mode (No API credits used)",
517
+ value=False
518
+ )
519
+
520
+ submit_btn = gr.Button("Begin Research", variant="primary")
521
+
522
+ with gr.Row():
523
+ # Preview panel
524
+ report_output = gr.Markdown(label="Research Results")
525
+ # Download panel
526
+ with gr.Column():
527
+ gr.Markdown("### Download Options")
528
+ with gr.Row():
529
+ download_md = gr.File(label="Download Markdown", visible=False)
530
+ download_html = gr.File(label="Download HTML", visible=False)
531
+
532
+ def process_query(query: str, test_mode: bool) -> tuple[str, str, str]:
533
+ """Process the query and return markdown content and file paths"""
534
+ try:
535
+ self.test_mode = test_mode
536
+ if self.test_mode:
537
+ markdown_text = """# Test Mode Response
538
+
539
+ ## Overview
540
+ This is a sample report generated in test mode without using API credits.
541
+
542
+ ## Key Findings
543
+ 1. Test finding one
544
+ 2. Test finding two
545
+
546
+ ## Test Results
547
+ Sample analysis content..."""
548
+ else:
549
+ # Use multi-agent system to process query
550
+ markdown_text = self.agent_system.process_query(query)
551
+
552
+ # Generate both markdown and HTML files
553
+ md_path = save_markdown_report(markdown_text)
554
+ html_path = convert_to_html(markdown_text)
555
+
556
+ # Make download buttons visible and return results
557
+ return (
558
+ markdown_text, # Preview content
559
+ gr.update(value=md_path, visible=True), # Markdown download
560
+ gr.update(value=html_path, visible=True) # HTML download
561
+ )
562
+
563
+ except Exception as e:
564
+ server_logger.error(f"Error processing query: {str(e)}")
565
+ return (
566
+ f"Error: {str(e)}", # Error message in preview
567
+ gr.update(visible=False), # Hide markdown download
568
+ gr.update(visible=False) # Hide HTML download
569
+ )
570
+
571
+ # Connect the button to the processing function
572
+ submit_btn.click(
573
+ fn=process_query,
574
+ inputs=[query_input, test_mode_checkbox],
575
+ outputs=[report_output, download_md, download_html]
576
+ )
577
+
578
+ # Add example queries
579
+ gr.Examples(
580
+ examples=[
581
+ ["What are the latest advances in transformer architecture optimizations?"],
582
+ ["Explain the mathematical foundations of diffusion models"],
583
+ ["Compare and analyze different approaches to few-shot learning"]
584
+ ],
585
+ inputs=query_input
586
+ )
587
+
588
+ return interface
589
+
590
+ if __name__ == "__main__":
591
+ try:
592
+ # Configure event loop policy for Windows
593
+ if os.name == 'nt': # Windows
594
+ import asyncio
595
+ import sys
596
+ if sys.version_info[0] == 3 and sys.version_info[1] >= 8:
597
+ asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
598
+
599
+ server_logger.info("Starting Gradio server")
600
+ interface = create_interface()
601
+ interface.launch(
602
+ server_name="0.0.0.0",
603
+ share=False,
604
+ debug=True,
605
+ prevent_thread_lock=True, # Allow for proper cleanup
606
+ )
607
+ except Exception as e:
608
+ server_logger.error(f"Failed to start Gradio server: {str(e)}", exc_info=True)
609
+ raise
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ google-genai>=0.2.0
2
+ openai>=1.0.0
3
+ tavily-python>=0.2.0
4
+ python-dotenv>=1.0.0
5
+ retry>=0.9.2
6
+ aiohttp>=3.8.0
7
+ tenacity>=8.2.0
8
+ tiktoken>=0.5.0
utils.py ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import logging
4
+ from datetime import datetime
5
+ from typing import Dict, Any, Optional, List, Tuple
6
+ from markdown_it import MarkdownIt
7
+
8
+ def validate_response(response: Any, expected_type: type) -> bool:
9
+ """Validate response type and structure"""
10
+ if not isinstance(response, expected_type):
11
+ return False
12
+ return True
13
+
14
+ def format_source_content(
15
+ title: str,
16
+ url: str,
17
+ date: str,
18
+ content: str,
19
+ source_type: str
20
+ ) -> str:
21
+ """Format source content with consistent styling"""
22
+ return f"""### Source: {title}
23
+ URL: {url}
24
+ Date: {date if date else 'Not available'}
25
+ Type: {source_type}
26
+
27
+ **Key Content:**
28
+ {content}
29
+
30
+ ---"""
31
+
32
+ def parse_research_results(results: List[Dict[str, Any]]) -> Tuple[List[str], List[Dict[str, str]]]:
33
+ """Parse and validate research results"""
34
+ contexts = []
35
+ sources = []
36
+
37
+ for result in results:
38
+ title = result.get("title", "").strip()
39
+ content = result.get("content", "").strip()
40
+ url = result.get("url", "").strip()
41
+ date = result.get("published_date", "").strip()
42
+
43
+ if title and content:
44
+ source_type = (
45
+ "research_paper"
46
+ if "arxiv.org" in url or "paper" in url.lower()
47
+ else "article"
48
+ )
49
+
50
+ sources.append({
51
+ "title": title,
52
+ "url": url,
53
+ "date": date if date else "Date not available",
54
+ "type": source_type
55
+ })
56
+
57
+ contexts.append(
58
+ format_source_content(title, url, date, content, source_type)
59
+ )
60
+
61
+ return contexts, sources
62
+
63
+ def format_sources_section(sources: List[Dict[str, str]]) -> str:
64
+ """Format the sources section of the response with proper markdown"""
65
+ sources_section = "\n\n## Sources Cited\n\n"
66
+
67
+ if not sources:
68
+ return sources_section + "No sources were found during the research phase."
69
+
70
+ research_papers = [s for s in sources if s['type'] == 'research_paper']
71
+ articles = [s for s in sources if s['type'] == 'article']
72
+
73
+ if research_papers:
74
+ sources_section += "\n### Research Papers\n"
75
+ for idx, source in enumerate(research_papers, 1):
76
+ sources_section += f"{idx}. [{source['title']}]({source['url']}) - {source['date']}\n"
77
+
78
+ if articles:
79
+ sources_section += "\n### Technical Articles & Resources\n"
80
+ for idx, source in enumerate(articles, 1):
81
+ sources_section += f"{idx}. [{source['title']}]({source['url']}) - {source['date']}\n"
82
+
83
+ # Add line break after sources section
84
+ sources_section += "\n"
85
+ return sources_section
86
+
87
+ def save_markdown_report(content: str) -> str:
88
+ """Save markdown content to a file and return the file path
89
+
90
+ Args:
91
+ content: The markdown content to save
92
+
93
+ Returns:
94
+ str: Path to the generated markdown file
95
+ """
96
+ try:
97
+ # Create output directory if it doesn't exist
98
+ os.makedirs("generated_reports", exist_ok=True)
99
+
100
+ # Generate unique filename
101
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
102
+ filename = f"research_report_{timestamp}.md"
103
+ file_path = os.path.join("generated_reports", filename)
104
+
105
+ # Save markdown content
106
+ with open(file_path, 'w', encoding='utf-8') as f:
107
+ f.write(content)
108
+
109
+ return file_path
110
+
111
+ except Exception as e:
112
+ logger = logging.getLogger(__name__)
113
+ logger.error(f"Failed to save markdown report: {str(e)}")
114
+ raise
115
+
116
+ def convert_to_html(markdown_content: str) -> str:
117
+ """Convert markdown to styled HTML and save to file
118
+
119
+ Args:
120
+ markdown_content: The markdown content to convert
121
+
122
+ Returns:
123
+ str: Path to the generated HTML file
124
+ """
125
+ try:
126
+ # Initialize markdown parser
127
+ md = MarkdownIt('commonmark', {'html': True})
128
+
129
+ # Convert markdown to HTML
130
+ html_content = md.render(markdown_content)
131
+
132
+ # Add styling
133
+ styled_html = f"""
134
+ <!DOCTYPE html>
135
+ <html>
136
+ <head>
137
+ <meta charset="UTF-8">
138
+ <style>
139
+ body {{
140
+ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Arial, sans-serif;
141
+ line-height: 1.6;
142
+ max-width: 900px;
143
+ margin: 40px auto;
144
+ padding: 20px;
145
+ color: #333;
146
+ }}
147
+ h1, h2, h3 {{ color: #2c3e50; }}
148
+ code {{
149
+ background-color: #f5f5f5;
150
+ padding: 2px 4px;
151
+ border-radius: 4px;
152
+ font-family: 'Consolas', 'Monaco', 'Andale Mono', monospace;
153
+ }}
154
+ pre {{
155
+ background-color: #f5f5f5;
156
+ padding: 15px;
157
+ border-radius: 8px;
158
+ overflow-x: auto;
159
+ }}
160
+ blockquote {{
161
+ border-left: 4px solid #2c3e50;
162
+ margin: 0;
163
+ padding-left: 20px;
164
+ color: #666;
165
+ }}
166
+ table {{
167
+ border-collapse: collapse;
168
+ width: 100%;
169
+ margin: 20px 0;
170
+ }}
171
+ th, td {{
172
+ border: 1px solid #ddd;
173
+ padding: 8px;
174
+ text-align: left;
175
+ }}
176
+ th {{ background-color: #f5f5f5; }}
177
+ img {{ max-width: 100%; height: auto; }}
178
+ .sources {{
179
+ margin-top: 40px;
180
+ padding-top: 20px;
181
+ border-top: 2px solid #eee;
182
+ }}
183
+ </style>
184
+ </head>
185
+ <body>
186
+ {html_content}
187
+ </body>
188
+ </html>
189
+ """
190
+
191
+ # Create output directory if it doesn't exist
192
+ os.makedirs("generated_reports", exist_ok=True)
193
+
194
+ # Generate unique filename
195
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
196
+ html_path = os.path.join("generated_reports", f"report_{timestamp}.html")
197
+
198
+ # Save HTML file
199
+ with open(html_path, 'w', encoding='utf-8') as f:
200
+ f.write(styled_html)
201
+
202
+ return html_path
203
+
204
+ except Exception as e:
205
+ logger = logging.getLogger(__name__)
206
+ logger.error(f"Failed to convert markdown to HTML: {str(e)}")
207
+ raise