biyootiful commited on
Commit
c0da66d
·
1 Parent(s): 9c28499

remove groq

Browse files
Files changed (4) hide show
  1. README.md +1 -2
  2. app.py +2 -51
  3. config.py +1 -7
  4. requirements.txt +1 -1
README.md CHANGED
@@ -12,8 +12,7 @@ RAG-based chatbot for answering questions about professional background and expe
12
 
13
  Set environment variables in Space secrets:
14
 
15
- - `LLM_PROVIDER` - Set to `local` (default), `groq`, or `huggingface`
16
- - `GROQ_API_KEY` - Required if using Groq
17
  - `HUGGINGFACE_API_KEY` - Required if using HuggingFace Inference API
18
  - `SESSION_TOKEN_SECRET` - Optional, for session auth
19
  - `CLIENT_APP_ORIGINS` - Optional, comma-separated allowed origins
 
12
 
13
  Set environment variables in Space secrets:
14
 
15
+ - `LLM_PROVIDER` - Set to `local` (default) or `huggingface`
 
16
  - `HUGGINGFACE_API_KEY` - Required if using HuggingFace Inference API
17
  - `SESSION_TOKEN_SECRET` - Optional, for session auth
18
  - `CLIENT_APP_ORIGINS` - Optional, comma-separated allowed origins
app.py CHANGED
@@ -11,8 +11,6 @@ import time
11
  from typing import List, Dict, Optional, Tuple
12
  import numpy as np
13
  import torch
14
- import httpx
15
- import inspect
16
  from fastapi import Depends, FastAPI, HTTPException, Header
17
  from fastapi.middleware.cors import CORSMiddleware
18
  from pydantic import BaseModel
@@ -73,32 +71,9 @@ if not hasattr(huggingface_hub, "cached_download"):
73
  from sentence_transformers import SentenceTransformer
74
  import faiss
75
 
76
- # Patch httpx to gracefully ignore deprecated `proxies` argument used by groq client when running with httpx>=0.28.
77
- if "proxies" not in inspect.signature(httpx.Client.__init__).parameters:
78
- _original_httpx_client_init = httpx.Client.__init__
79
-
80
- def _httpx_client_init_with_proxies(self, *args, proxies=None, **kwargs):
81
- return _original_httpx_client_init(self, *args, **kwargs)
82
-
83
- httpx.Client.__init__ = _httpx_client_init_with_proxies # type: ignore[assignment]
84
-
85
- if "proxies" not in inspect.signature(httpx.AsyncClient.__init__).parameters:
86
- _original_httpx_async_client_init = httpx.AsyncClient.__init__
87
-
88
- def _httpx_async_client_init_with_proxies(self, *args, proxies=None, **kwargs):
89
- if proxies is not None and "proxy" not in kwargs:
90
- kwargs["proxy"] = proxies
91
- return _original_httpx_async_client_init(self, *args, **kwargs)
92
-
93
- httpx.AsyncClient.__init__ = _httpx_async_client_init_with_proxies # type: ignore[assignment]
94
-
95
- from groq import Groq
96
-
97
  # Import configuration
98
  from config import (
99
  LLM_PROVIDER,
100
- GROQ_API_KEY,
101
- GROQ_MODEL,
102
  HUGGINGFACE_API_KEY,
103
  HUGGINGFACE_MODEL,
104
  LOCAL_MODEL_REPO,
@@ -375,12 +350,7 @@ def initialize_llm():
375
  """Initialize LLM client based on provider"""
376
  global llm_client, local_model_path
377
 
378
- if LLM_PROVIDER == "groq":
379
- if not GROQ_API_KEY:
380
- raise ValueError("GROQ_API_KEY not set in environment variables")
381
- llm_client = Groq(api_key=GROQ_API_KEY)
382
- print(f"Initialized Groq client with model: {GROQ_MODEL}")
383
- elif LLM_PROVIDER == "huggingface":
384
  # Will use requests for HF Inference API
385
  if not HUGGINGFACE_API_KEY:
386
  raise ValueError("HUGGINGFACE_API_KEY not set in environment variables")
@@ -453,23 +423,6 @@ def retrieve_relevant_chunks(query: str, top_k: int = TOP_K_RESULTS) -> List[str
453
  return relevant_chunks
454
 
455
 
456
- def generate_response_groq(prompt: str) -> str:
457
- """Generate response using Groq API"""
458
- try:
459
- chat_completion = llm_client.chat.completions.create(
460
- messages=[
461
- {"role": "system", "content": SYSTEM_PROMPT},
462
- {"role": "user", "content": prompt}
463
- ],
464
- model=GROQ_MODEL,
465
- temperature=0.7,
466
- max_tokens=500,
467
- )
468
- return chat_completion.choices[0].message.content
469
- except Exception as e:
470
- raise HTTPException(status_code=500, detail=f"Groq API error: {str(e)}")
471
-
472
-
473
  def generate_response_huggingface(prompt: str) -> str:
474
  """Generate response using HuggingFace Inference API (OpenAI-compatible endpoint)."""
475
  import requests
@@ -586,9 +539,7 @@ Provide a concise, professional answer based only on the context above."""
586
 
587
  combined_prompt = f"{system_prompt}\n\n{user_prompt}"
588
 
589
- if LLM_PROVIDER == "groq":
590
- return generate_response_groq(combined_prompt)
591
- elif LLM_PROVIDER == "huggingface":
592
  return generate_response_huggingface(combined_prompt)
593
  elif LLM_PROVIDER == "local":
594
  return generate_response_local(system_prompt, user_prompt)
 
11
  from typing import List, Dict, Optional, Tuple
12
  import numpy as np
13
  import torch
 
 
14
  from fastapi import Depends, FastAPI, HTTPException, Header
15
  from fastapi.middleware.cors import CORSMiddleware
16
  from pydantic import BaseModel
 
71
  from sentence_transformers import SentenceTransformer
72
  import faiss
73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  # Import configuration
75
  from config import (
76
  LLM_PROVIDER,
 
 
77
  HUGGINGFACE_API_KEY,
78
  HUGGINGFACE_MODEL,
79
  LOCAL_MODEL_REPO,
 
350
  """Initialize LLM client based on provider"""
351
  global llm_client, local_model_path
352
 
353
+ if LLM_PROVIDER == "huggingface":
 
 
 
 
 
354
  # Will use requests for HF Inference API
355
  if not HUGGINGFACE_API_KEY:
356
  raise ValueError("HUGGINGFACE_API_KEY not set in environment variables")
 
423
  return relevant_chunks
424
 
425
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
426
  def generate_response_huggingface(prompt: str) -> str:
427
  """Generate response using HuggingFace Inference API (OpenAI-compatible endpoint)."""
428
  import requests
 
539
 
540
  combined_prompt = f"{system_prompt}\n\n{user_prompt}"
541
 
542
+ if LLM_PROVIDER == "huggingface":
 
 
543
  return generate_response_huggingface(combined_prompt)
544
  elif LLM_PROVIDER == "local":
545
  return generate_response_local(system_prompt, user_prompt)
config.py CHANGED
@@ -6,19 +6,13 @@ Change LLM_PROVIDER to switch between different models
6
  import os
7
 
8
  # Swappable LLM provider (environment configurable)
9
- LLM_PROVIDER = os.getenv("LLM_PROVIDER", "huggingface") # Options: "groq", "huggingface", "openai", "local"
10
 
11
  # API Keys (set these as environment variables in HuggingFace Space secrets)
12
- GROQ_API_KEY = os.getenv("GROQ_API_KEY", "")
13
  HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY", "")
14
- OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
15
 
16
  # Model configurations
17
- GROQ_MODEL = "mixtral-8x7b-32768" # Fast and good quality
18
- # GROQ_MODEL = "llama3-8b-8192" # Alternative: faster but slightly lower quality
19
-
20
  HUGGINGFACE_MODEL = "google/gemma-2-2b-it"
21
- OPENAI_MODEL = "gpt-3.5-turbo"
22
 
23
  # Local model configuration (for quantized models hosted within the Space)
24
  LOCAL_MODEL_REPO = os.getenv("LOCAL_MODEL_REPO", "tensorblock/gemma-2-2b-it-GGUF")
 
6
  import os
7
 
8
  # Swappable LLM provider (environment configurable)
9
+ LLM_PROVIDER = os.getenv("LLM_PROVIDER", "local") # Options: "huggingface", "local"
10
 
11
  # API Keys (set these as environment variables in HuggingFace Space secrets)
 
12
  HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY", "")
 
13
 
14
  # Model configurations
 
 
 
15
  HUGGINGFACE_MODEL = "google/gemma-2-2b-it"
 
16
 
17
  # Local model configuration (for quantized models hosted within the Space)
18
  LOCAL_MODEL_REPO = os.getenv("LOCAL_MODEL_REPO", "tensorblock/gemma-2-2b-it-GGUF")
requirements.txt CHANGED
@@ -3,9 +3,9 @@ uvicorn[standard]==0.24.0
3
  sentence-transformers==2.2.2
4
  huggingface-hub<0.19
5
  faiss-cpu==1.8.0
6
- httpx<0.28
7
  pydantic==2.5.0
8
  numpy>=1.26.4,<2
9
  python-multipart==0.0.6
10
  llama-cpp-python==0.2.82
11
  itsdangerous==2.2.0
 
 
3
  sentence-transformers==2.2.2
4
  huggingface-hub<0.19
5
  faiss-cpu==1.8.0
 
6
  pydantic==2.5.0
7
  numpy>=1.26.4,<2
8
  python-multipart==0.0.6
9
  llama-cpp-python==0.2.82
10
  itsdangerous==2.2.0
11
+ requests==2.31.0