Spaces:
Runtime error
Runtime error
eliujl
commited on
Commit
·
4117986
1
Parent(s):
3607afa
Updated models and retrieval number
Browse filesUpdated OpenAI models and Mistral models. Added a slider for number of retrieval when using retriever.
app.py
CHANGED
|
@@ -23,12 +23,12 @@ import json
|
|
| 23 |
OPENAI_API_KEY = ''
|
| 24 |
PINECONE_API_KEY = ''
|
| 25 |
PINECONE_API_ENV = ''
|
| 26 |
-
gpt3p5 = 'gpt-3.5-turbo-
|
| 27 |
-
gpt4 = 'gpt-4-
|
| 28 |
local_model_tuples = [
|
| 29 |
(0, 'mistral_7b', "TheBloke/OpenHermes-2-Mistral-7B-GGUF", "openhermes-2-mistral-7b.Q8_0.gguf", "mistral", "https://huggingface.co/TheBloke/OpenHermes-2-Mistral-7B-GGUF"),
|
| 30 |
-
(1, 'mistral_7b_inst_small', "TheBloke/Mistral-7B-Instruct-v0.
|
| 31 |
-
(2, 'mistral_7b_inst_med', "TheBloke/Mistral-7B-Instruct-v0.
|
| 32 |
(3, 'llama_13b_small', "TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q4_K_M.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
|
| 33 |
(4, 'llama_13b_med', "TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q8_0.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
|
| 34 |
(5, 'mixtral', "TheBloke/Mixtral-8x7B-v0.1-GGUF", "mixtral-8x7b-v0.1.Q8_0.gguf", "mixtral", "https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF"),
|
|
@@ -320,7 +320,8 @@ def main(pinecone_index_name, chroma_collection_name, persist_directory, docsear
|
|
| 320 |
temperature = st.slider('Temperature', 0.0, 1.0, 0.1)
|
| 321 |
if usage == 'RAG':
|
| 322 |
r_pinecone = st.radio('Vector store:', ('Pinecone (online)', 'Chroma (local)'))
|
| 323 |
-
|
|
|
|
| 324 |
r_ingest = st.radio('Ingest file(s)?', ('Yes', 'No'))
|
| 325 |
if r_pinecone == 'Pinecone (online)':
|
| 326 |
use_pinecone = True
|
|
@@ -399,7 +400,7 @@ def main(pinecone_index_name, chroma_collection_name, persist_directory, docsear
|
|
| 399 |
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True, output_key='answer')
|
| 400 |
if usage == 'RAG':
|
| 401 |
# number of sources (split-documents when ingesting files); default is 4
|
| 402 |
-
k = min([
|
| 403 |
retriever = setup_retriever(docsearch, k)
|
| 404 |
CRqa = ConversationalRetrievalChain.from_llm(
|
| 405 |
llm,
|
|
|
|
| 23 |
OPENAI_API_KEY = ''
|
| 24 |
PINECONE_API_KEY = ''
|
| 25 |
PINECONE_API_ENV = ''
|
| 26 |
+
gpt3p5 = 'gpt-3.5-turbo-0125'
|
| 27 |
+
gpt4 = 'gpt-4-0125-preview'
|
| 28 |
local_model_tuples = [
|
| 29 |
(0, 'mistral_7b', "TheBloke/OpenHermes-2-Mistral-7B-GGUF", "openhermes-2-mistral-7b.Q8_0.gguf", "mistral", "https://huggingface.co/TheBloke/OpenHermes-2-Mistral-7B-GGUF"),
|
| 30 |
+
(1, 'mistral_7b_inst_small', "TheBloke/Mistral-7B-Instruct-v0.2-GGUF", "mistral-7b-instruct-v0.2.Q2_K.gguf", "mistral", "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF"),
|
| 31 |
+
(2, 'mistral_7b_inst_med', "TheBloke/Mistral-7B-Instruct-v0.2-GGUF", "mistral-7b-instruct-v0.2.Q8_0.gguf", "mistral", "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF"),
|
| 32 |
(3, 'llama_13b_small', "TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q4_K_M.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
|
| 33 |
(4, 'llama_13b_med', "TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q8_0.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
|
| 34 |
(5, 'mixtral', "TheBloke/Mixtral-8x7B-v0.1-GGUF", "mixtral-8x7b-v0.1.Q8_0.gguf", "mixtral", "https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF"),
|
|
|
|
| 320 |
temperature = st.slider('Temperature', 0.0, 1.0, 0.1)
|
| 321 |
if usage == 'RAG':
|
| 322 |
r_pinecone = st.radio('Vector store:', ('Pinecone (online)', 'Chroma (local)'))
|
| 323 |
+
k_retrieval = st.slider('# source chunk(s) to retrieve', 1, 80, 20)
|
| 324 |
+
k_sources = st.slider('# source chunk(s) to print', 0, 20, 2)
|
| 325 |
r_ingest = st.radio('Ingest file(s)?', ('Yes', 'No'))
|
| 326 |
if r_pinecone == 'Pinecone (online)':
|
| 327 |
use_pinecone = True
|
|
|
|
| 400 |
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True, output_key='answer')
|
| 401 |
if usage == 'RAG':
|
| 402 |
# number of sources (split-documents when ingesting files); default is 4
|
| 403 |
+
k = min([k_retrieval, n_texts])
|
| 404 |
retriever = setup_retriever(docsearch, k)
|
| 405 |
CRqa = ConversationalRetrievalChain.from_llm(
|
| 406 |
llm,
|