Spaces:

Zenne
/

chatbot_for_files_langchain

Runtime error

App Files Files Community

eliujl commited on Feb 21, 2024

Commit

4117986

1 Parent(s): 3607afa

Updated models and retrieval number

Browse files

Updated OpenAI models and Mistral models. Added a slider for number of retrieval when using retriever.

Files changed (1) hide show

app.py +7 -6

app.py CHANGED Viewed

@@ -23,12 +23,12 @@ import json
 OPENAI_API_KEY = ''
 PINECONE_API_KEY = ''
 PINECONE_API_ENV = ''
-gpt3p5 = 'gpt-3.5-turbo-1106'
-gpt4 = 'gpt-4-1106-preview'
 local_model_tuples = [
         (0, 'mistral_7b', "TheBloke/OpenHermes-2-Mistral-7B-GGUF", "openhermes-2-mistral-7b.Q8_0.gguf", "mistral", "https://huggingface.co/TheBloke/OpenHermes-2-Mistral-7B-GGUF"),
-        (1, 'mistral_7b_inst_small', "TheBloke/Mistral-7B-Instruct-v0.1-GGUF", "mistral-7b-instruct-v0.1.Q2_K.gguf", "mistral", "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF"),
-        (2, 'mistral_7b_inst_med', "TheBloke/Mistral-7B-Instruct-v0.1-GGUF", "mistral-7b-instruct-v0.1.Q8_0.gguf", "mistral", "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF"),
         (3, 'llama_13b_small', "TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q4_K_M.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
         (4, 'llama_13b_med', "TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q8_0.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
         (5, 'mixtral', "TheBloke/Mixtral-8x7B-v0.1-GGUF", "mixtral-8x7b-v0.1.Q8_0.gguf", "mixtral", "https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF"),
@@ -320,7 +320,8 @@ def main(pinecone_index_name, chroma_collection_name, persist_directory, docsear
         temperature = st.slider('Temperature', 0.0, 1.0, 0.1)
         if usage == 'RAG':
             r_pinecone = st.radio('Vector store:', ('Pinecone (online)', 'Chroma (local)'))
-            k_sources = st.slider('# source(s) to print out', 0, 20, 2)
             r_ingest = st.radio('Ingest file(s)?', ('Yes', 'No'))
             if r_pinecone == 'Pinecone (online)':
                 use_pinecone = True
@@ -399,7 +400,7 @@ def main(pinecone_index_name, chroma_collection_name, persist_directory, docsear
         memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True, output_key='answer')
         if usage == 'RAG':
             # number of sources (split-documents when ingesting files); default is 4
-            k = min([20, n_texts])
             retriever = setup_retriever(docsearch, k)
             CRqa = ConversationalRetrievalChain.from_llm(
                     llm,

 OPENAI_API_KEY = ''
 PINECONE_API_KEY = ''
 PINECONE_API_ENV = ''
+gpt3p5 = 'gpt-3.5-turbo-0125'
+gpt4 = 'gpt-4-0125-preview'
 local_model_tuples = [
         (0, 'mistral_7b', "TheBloke/OpenHermes-2-Mistral-7B-GGUF", "openhermes-2-mistral-7b.Q8_0.gguf", "mistral", "https://huggingface.co/TheBloke/OpenHermes-2-Mistral-7B-GGUF"),
+        (1, 'mistral_7b_inst_small', "TheBloke/Mistral-7B-Instruct-v0.2-GGUF", "mistral-7b-instruct-v0.2.Q2_K.gguf", "mistral", "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF"),
+        (2, 'mistral_7b_inst_med', "TheBloke/Mistral-7B-Instruct-v0.2-GGUF", "mistral-7b-instruct-v0.2.Q8_0.gguf", "mistral", "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF"),
         (3, 'llama_13b_small', "TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q4_K_M.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
         (4, 'llama_13b_med', "TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q8_0.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
         (5, 'mixtral', "TheBloke/Mixtral-8x7B-v0.1-GGUF", "mixtral-8x7b-v0.1.Q8_0.gguf", "mixtral", "https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF"),
         temperature = st.slider('Temperature', 0.0, 1.0, 0.1)
         if usage == 'RAG':
             r_pinecone = st.radio('Vector store:', ('Pinecone (online)', 'Chroma (local)'))
+            k_retrieval = st.slider('# source chunk(s) to retrieve', 1, 80, 20)
+            k_sources = st.slider('# source chunk(s) to print', 0, 20, 2)
             r_ingest = st.radio('Ingest file(s)?', ('Yes', 'No'))
             if r_pinecone == 'Pinecone (online)':
                 use_pinecone = True
         memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True, output_key='answer')
         if usage == 'RAG':
             # number of sources (split-documents when ingesting files); default is 4
+            k = min([k_retrieval, n_texts])
             retriever = setup_retriever(docsearch, k)
             CRqa = ConversationalRetrievalChain.from_llm(
                     llm,