Spaces:
Running
Running
Commit
·
c0da66d
1
Parent(s):
9c28499
remove groq
Browse files
README.md
CHANGED
|
@@ -12,8 +12,7 @@ RAG-based chatbot for answering questions about professional background and expe
|
|
| 12 |
|
| 13 |
Set environment variables in Space secrets:
|
| 14 |
|
| 15 |
-
- `LLM_PROVIDER` - Set to `local` (default)
|
| 16 |
-
- `GROQ_API_KEY` - Required if using Groq
|
| 17 |
- `HUGGINGFACE_API_KEY` - Required if using HuggingFace Inference API
|
| 18 |
- `SESSION_TOKEN_SECRET` - Optional, for session auth
|
| 19 |
- `CLIENT_APP_ORIGINS` - Optional, comma-separated allowed origins
|
|
|
|
| 12 |
|
| 13 |
Set environment variables in Space secrets:
|
| 14 |
|
| 15 |
+
- `LLM_PROVIDER` - Set to `local` (default) or `huggingface`
|
|
|
|
| 16 |
- `HUGGINGFACE_API_KEY` - Required if using HuggingFace Inference API
|
| 17 |
- `SESSION_TOKEN_SECRET` - Optional, for session auth
|
| 18 |
- `CLIENT_APP_ORIGINS` - Optional, comma-separated allowed origins
|
app.py
CHANGED
|
@@ -11,8 +11,6 @@ import time
|
|
| 11 |
from typing import List, Dict, Optional, Tuple
|
| 12 |
import numpy as np
|
| 13 |
import torch
|
| 14 |
-
import httpx
|
| 15 |
-
import inspect
|
| 16 |
from fastapi import Depends, FastAPI, HTTPException, Header
|
| 17 |
from fastapi.middleware.cors import CORSMiddleware
|
| 18 |
from pydantic import BaseModel
|
|
@@ -73,32 +71,9 @@ if not hasattr(huggingface_hub, "cached_download"):
|
|
| 73 |
from sentence_transformers import SentenceTransformer
|
| 74 |
import faiss
|
| 75 |
|
| 76 |
-
# Patch httpx to gracefully ignore deprecated `proxies` argument used by groq client when running with httpx>=0.28.
|
| 77 |
-
if "proxies" not in inspect.signature(httpx.Client.__init__).parameters:
|
| 78 |
-
_original_httpx_client_init = httpx.Client.__init__
|
| 79 |
-
|
| 80 |
-
def _httpx_client_init_with_proxies(self, *args, proxies=None, **kwargs):
|
| 81 |
-
return _original_httpx_client_init(self, *args, **kwargs)
|
| 82 |
-
|
| 83 |
-
httpx.Client.__init__ = _httpx_client_init_with_proxies # type: ignore[assignment]
|
| 84 |
-
|
| 85 |
-
if "proxies" not in inspect.signature(httpx.AsyncClient.__init__).parameters:
|
| 86 |
-
_original_httpx_async_client_init = httpx.AsyncClient.__init__
|
| 87 |
-
|
| 88 |
-
def _httpx_async_client_init_with_proxies(self, *args, proxies=None, **kwargs):
|
| 89 |
-
if proxies is not None and "proxy" not in kwargs:
|
| 90 |
-
kwargs["proxy"] = proxies
|
| 91 |
-
return _original_httpx_async_client_init(self, *args, **kwargs)
|
| 92 |
-
|
| 93 |
-
httpx.AsyncClient.__init__ = _httpx_async_client_init_with_proxies # type: ignore[assignment]
|
| 94 |
-
|
| 95 |
-
from groq import Groq
|
| 96 |
-
|
| 97 |
# Import configuration
|
| 98 |
from config import (
|
| 99 |
LLM_PROVIDER,
|
| 100 |
-
GROQ_API_KEY,
|
| 101 |
-
GROQ_MODEL,
|
| 102 |
HUGGINGFACE_API_KEY,
|
| 103 |
HUGGINGFACE_MODEL,
|
| 104 |
LOCAL_MODEL_REPO,
|
|
@@ -375,12 +350,7 @@ def initialize_llm():
|
|
| 375 |
"""Initialize LLM client based on provider"""
|
| 376 |
global llm_client, local_model_path
|
| 377 |
|
| 378 |
-
if LLM_PROVIDER == "
|
| 379 |
-
if not GROQ_API_KEY:
|
| 380 |
-
raise ValueError("GROQ_API_KEY not set in environment variables")
|
| 381 |
-
llm_client = Groq(api_key=GROQ_API_KEY)
|
| 382 |
-
print(f"Initialized Groq client with model: {GROQ_MODEL}")
|
| 383 |
-
elif LLM_PROVIDER == "huggingface":
|
| 384 |
# Will use requests for HF Inference API
|
| 385 |
if not HUGGINGFACE_API_KEY:
|
| 386 |
raise ValueError("HUGGINGFACE_API_KEY not set in environment variables")
|
|
@@ -453,23 +423,6 @@ def retrieve_relevant_chunks(query: str, top_k: int = TOP_K_RESULTS) -> List[str
|
|
| 453 |
return relevant_chunks
|
| 454 |
|
| 455 |
|
| 456 |
-
def generate_response_groq(prompt: str) -> str:
|
| 457 |
-
"""Generate response using Groq API"""
|
| 458 |
-
try:
|
| 459 |
-
chat_completion = llm_client.chat.completions.create(
|
| 460 |
-
messages=[
|
| 461 |
-
{"role": "system", "content": SYSTEM_PROMPT},
|
| 462 |
-
{"role": "user", "content": prompt}
|
| 463 |
-
],
|
| 464 |
-
model=GROQ_MODEL,
|
| 465 |
-
temperature=0.7,
|
| 466 |
-
max_tokens=500,
|
| 467 |
-
)
|
| 468 |
-
return chat_completion.choices[0].message.content
|
| 469 |
-
except Exception as e:
|
| 470 |
-
raise HTTPException(status_code=500, detail=f"Groq API error: {str(e)}")
|
| 471 |
-
|
| 472 |
-
|
| 473 |
def generate_response_huggingface(prompt: str) -> str:
|
| 474 |
"""Generate response using HuggingFace Inference API (OpenAI-compatible endpoint)."""
|
| 475 |
import requests
|
|
@@ -586,9 +539,7 @@ Provide a concise, professional answer based only on the context above."""
|
|
| 586 |
|
| 587 |
combined_prompt = f"{system_prompt}\n\n{user_prompt}"
|
| 588 |
|
| 589 |
-
if LLM_PROVIDER == "
|
| 590 |
-
return generate_response_groq(combined_prompt)
|
| 591 |
-
elif LLM_PROVIDER == "huggingface":
|
| 592 |
return generate_response_huggingface(combined_prompt)
|
| 593 |
elif LLM_PROVIDER == "local":
|
| 594 |
return generate_response_local(system_prompt, user_prompt)
|
|
|
|
| 11 |
from typing import List, Dict, Optional, Tuple
|
| 12 |
import numpy as np
|
| 13 |
import torch
|
|
|
|
|
|
|
| 14 |
from fastapi import Depends, FastAPI, HTTPException, Header
|
| 15 |
from fastapi.middleware.cors import CORSMiddleware
|
| 16 |
from pydantic import BaseModel
|
|
|
|
| 71 |
from sentence_transformers import SentenceTransformer
|
| 72 |
import faiss
|
| 73 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
# Import configuration
|
| 75 |
from config import (
|
| 76 |
LLM_PROVIDER,
|
|
|
|
|
|
|
| 77 |
HUGGINGFACE_API_KEY,
|
| 78 |
HUGGINGFACE_MODEL,
|
| 79 |
LOCAL_MODEL_REPO,
|
|
|
|
| 350 |
"""Initialize LLM client based on provider"""
|
| 351 |
global llm_client, local_model_path
|
| 352 |
|
| 353 |
+
if LLM_PROVIDER == "huggingface":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 354 |
# Will use requests for HF Inference API
|
| 355 |
if not HUGGINGFACE_API_KEY:
|
| 356 |
raise ValueError("HUGGINGFACE_API_KEY not set in environment variables")
|
|
|
|
| 423 |
return relevant_chunks
|
| 424 |
|
| 425 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 426 |
def generate_response_huggingface(prompt: str) -> str:
|
| 427 |
"""Generate response using HuggingFace Inference API (OpenAI-compatible endpoint)."""
|
| 428 |
import requests
|
|
|
|
| 539 |
|
| 540 |
combined_prompt = f"{system_prompt}\n\n{user_prompt}"
|
| 541 |
|
| 542 |
+
if LLM_PROVIDER == "huggingface":
|
|
|
|
|
|
|
| 543 |
return generate_response_huggingface(combined_prompt)
|
| 544 |
elif LLM_PROVIDER == "local":
|
| 545 |
return generate_response_local(system_prompt, user_prompt)
|
config.py
CHANGED
|
@@ -6,19 +6,13 @@ Change LLM_PROVIDER to switch between different models
|
|
| 6 |
import os
|
| 7 |
|
| 8 |
# Swappable LLM provider (environment configurable)
|
| 9 |
-
LLM_PROVIDER = os.getenv("LLM_PROVIDER", "
|
| 10 |
|
| 11 |
# API Keys (set these as environment variables in HuggingFace Space secrets)
|
| 12 |
-
GROQ_API_KEY = os.getenv("GROQ_API_KEY", "")
|
| 13 |
HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY", "")
|
| 14 |
-
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
|
| 15 |
|
| 16 |
# Model configurations
|
| 17 |
-
GROQ_MODEL = "mixtral-8x7b-32768" # Fast and good quality
|
| 18 |
-
# GROQ_MODEL = "llama3-8b-8192" # Alternative: faster but slightly lower quality
|
| 19 |
-
|
| 20 |
HUGGINGFACE_MODEL = "google/gemma-2-2b-it"
|
| 21 |
-
OPENAI_MODEL = "gpt-3.5-turbo"
|
| 22 |
|
| 23 |
# Local model configuration (for quantized models hosted within the Space)
|
| 24 |
LOCAL_MODEL_REPO = os.getenv("LOCAL_MODEL_REPO", "tensorblock/gemma-2-2b-it-GGUF")
|
|
|
|
| 6 |
import os
|
| 7 |
|
| 8 |
# Swappable LLM provider (environment configurable)
|
| 9 |
+
LLM_PROVIDER = os.getenv("LLM_PROVIDER", "local") # Options: "huggingface", "local"
|
| 10 |
|
| 11 |
# API Keys (set these as environment variables in HuggingFace Space secrets)
|
|
|
|
| 12 |
HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY", "")
|
|
|
|
| 13 |
|
| 14 |
# Model configurations
|
|
|
|
|
|
|
|
|
|
| 15 |
HUGGINGFACE_MODEL = "google/gemma-2-2b-it"
|
|
|
|
| 16 |
|
| 17 |
# Local model configuration (for quantized models hosted within the Space)
|
| 18 |
LOCAL_MODEL_REPO = os.getenv("LOCAL_MODEL_REPO", "tensorblock/gemma-2-2b-it-GGUF")
|
requirements.txt
CHANGED
|
@@ -3,9 +3,9 @@ uvicorn[standard]==0.24.0
|
|
| 3 |
sentence-transformers==2.2.2
|
| 4 |
huggingface-hub<0.19
|
| 5 |
faiss-cpu==1.8.0
|
| 6 |
-
httpx<0.28
|
| 7 |
pydantic==2.5.0
|
| 8 |
numpy>=1.26.4,<2
|
| 9 |
python-multipart==0.0.6
|
| 10 |
llama-cpp-python==0.2.82
|
| 11 |
itsdangerous==2.2.0
|
|
|
|
|
|
| 3 |
sentence-transformers==2.2.2
|
| 4 |
huggingface-hub<0.19
|
| 5 |
faiss-cpu==1.8.0
|
|
|
|
| 6 |
pydantic==2.5.0
|
| 7 |
numpy>=1.26.4,<2
|
| 8 |
python-multipart==0.0.6
|
| 9 |
llama-cpp-python==0.2.82
|
| 10 |
itsdangerous==2.2.0
|
| 11 |
+
requests==2.31.0
|