Spaces:
Paused
Paused
lanny xu
commited on
Commit
·
82227df
1
Parent(s):
1057c00
modifies bug
Browse files- .env +2 -30
- .gitignore +1 -0
- config.py +16 -11
.env
CHANGED
|
@@ -1,31 +1,3 @@
|
|
|
|
|
| 1 |
TAVILY_API_KEY="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"
|
| 2 |
-
# NOMIC_API_KEY="nk-kt4Tu3UdwFpIlDdxLcd9AK3a7cfdAKhoXvPbJ78oVlE"
|
| 3 |
-
|
| 4 |
-
# 混合检索配置
|
| 5 |
-
ENABLE_HYBRID_SEARCH=true
|
| 6 |
-
BM25_K1=1.5
|
| 7 |
-
BM25_B=0.75
|
| 8 |
-
ENSEMBLE_WEIGHTS=[0.5, 0.5]
|
| 9 |
-
|
| 10 |
-
# 查询扩展配置
|
| 11 |
-
ENABLE_QUERY_EXPANSION=true
|
| 12 |
-
QUERY_EXPANSION_MODEL="all-MiniLM-L6-v2"
|
| 13 |
-
QUERY_EXPANSION_TOP_K=5
|
| 14 |
-
|
| 15 |
-
# 多模态配置
|
| 16 |
-
ENABLE_MULTIMODAL=true
|
| 17 |
-
MULTIMODAL_MODEL="openai/clip-vit-base-patch32"
|
| 18 |
-
MULTIMODAL_IMAGE_MODEL="openai/clip-vit-base-patch32"
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
# GraphRAG配置
|
| 25 |
-
ENABLE_GRAPH_RAG=true
|
| 26 |
-
GRAPH_ENTITY_EXTRACTION_MODEL="llama2"
|
| 27 |
-
GRAPH_RELATION_EXTRACTION_MODEL="llama2"
|
| 28 |
-
GRAPH_COMMUNITY_DETECTION=true
|
| 29 |
-
GRAPH_COMMUNITY_ALGORITHM="louvain"
|
| 30 |
-
GRAPH_VISUALIZATION=true
|
| 31 |
-
GRAPH_LAYOUT="spring"
|
|
|
|
| 1 |
+
# API密钥配置
|
| 2 |
TAVILY_API_KEY="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"
|
| 3 |
+
# NOMIC_API_KEY="nk-kt4Tu3UdwFpIlDdxLcd9AK3a7cfdAKhoXvPbJ78oVlE"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.gitignore
CHANGED
|
@@ -31,6 +31,7 @@ wheels/
|
|
| 31 |
MANIFEST
|
| 32 |
|
| 33 |
# 虚拟环境
|
|
|
|
| 34 |
.venv
|
| 35 |
env/
|
| 36 |
venv/
|
|
|
|
| 31 |
MANIFEST
|
| 32 |
|
| 33 |
# 虚拟环境
|
| 34 |
+
.env
|
| 35 |
.venv
|
| 36 |
env/
|
| 37 |
venv/
|
config.py
CHANGED
|
@@ -64,35 +64,40 @@ EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2" # HuggingFace嵌入
|
|
| 64 |
WEB_SEARCH_RESULTS_COUNT = 3
|
| 65 |
|
| 66 |
# GraphRAG配置
|
| 67 |
-
ENABLE_GRAPHRAG =
|
| 68 |
GRAPHRAG_INDEX_PATH = "./data/knowledge_graph.json" # 图谱索引保存路径
|
| 69 |
|
| 70 |
# 确保数据目录存在
|
| 71 |
import os
|
| 72 |
os.makedirs("./data", exist_ok=True)
|
| 73 |
-
GRAPHRAG_COMMUNITY_ALGORITHM = "louvain" # 社区检测算法: louvain, greedy, label_propagation
|
| 74 |
GRAPHRAG_MAX_HOPS = 2 # 本地查询最大跳数
|
| 75 |
GRAPHRAG_TOP_K_COMMUNITIES = 5 # 全局查询使用的社区数量
|
| 76 |
GRAPHRAG_BATCH_SIZE = 10 # 实体提取批处理大小
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
|
| 78 |
# 混合检索策略配置
|
| 79 |
-
ENABLE_HYBRID_SEARCH =
|
| 80 |
-
HYBRID_SEARCH_WEIGHTS = {"vector": 0.
|
| 81 |
KEYWORD_SEARCH_K = 5 # 关键词检索返回的文档数量
|
| 82 |
-
BM25_K1 = 1.
|
| 83 |
-
BM25_B = 0.75 # BM25算法的b参数
|
| 84 |
|
| 85 |
# 查询扩展优化配置
|
| 86 |
-
ENABLE_QUERY_EXPANSION =
|
| 87 |
-
QUERY_EXPANSION_MODEL = "mistral" # 用于查询扩展的模型
|
| 88 |
QUERY_EXPANSION_PROMPT = """请为以下查询生成3-5个相关的扩展查询,这些查询应该从不同角度探索原始查询的主题。
|
| 89 |
原始查询: {query}
|
| 90 |
扩展查询: """ # 查询扩展提示模板
|
| 91 |
-
MAX_EXPANDED_QUERIES =
|
| 92 |
|
| 93 |
# 多模态支持配置
|
| 94 |
-
ENABLE_MULTIMODAL =
|
| 95 |
-
MULTIMODAL_IMAGE_MODEL = "openai/clip-vit-base-patch32" # 图像嵌入模型
|
| 96 |
SUPPORTED_IMAGE_FORMATS = ["jpg", "jpeg", "png", "gif", "bmp"] # 支持的图像格式
|
| 97 |
IMAGE_EMBEDDING_DIM = 512 # 图像嵌入维度
|
| 98 |
MULTIMODAL_WEIGHTS = {"text": 0.7, "image": 0.3} # 文本和图像检索的权重
|
|
|
|
| 64 |
WEB_SEARCH_RESULTS_COUNT = 3
|
| 65 |
|
| 66 |
# GraphRAG配置
|
| 67 |
+
ENABLE_GRAPHRAG = os.environ.get("ENABLE_GRAPH_RAG", "true").lower() == "true" # 是否启用GraphRAG功能
|
| 68 |
GRAPHRAG_INDEX_PATH = "./data/knowledge_graph.json" # 图谱索引保存路径
|
| 69 |
|
| 70 |
# 确保数据目录存在
|
| 71 |
import os
|
| 72 |
os.makedirs("./data", exist_ok=True)
|
| 73 |
+
GRAPHRAG_COMMUNITY_ALGORITHM = os.environ.get("GRAPH_COMMUNITY_ALGORITHM", "louvain") # 社区检测算法: louvain, greedy, label_propagation
|
| 74 |
GRAPHRAG_MAX_HOPS = 2 # 本地查询最大跳数
|
| 75 |
GRAPHRAG_TOP_K_COMMUNITIES = 5 # 全局查询使用的社区数量
|
| 76 |
GRAPHRAG_BATCH_SIZE = 10 # 实体提取批处理大小
|
| 77 |
+
GRAPH_ENTITY_EXTRACTION_MODEL = os.environ.get("GRAPH_ENTITY_EXTRACTION_MODEL", "llama2")
|
| 78 |
+
GRAPH_RELATION_EXTRACTION_MODEL = os.environ.get("GRAPH_RELATION_EXTRACTION_MODEL", "llama2")
|
| 79 |
+
GRAPH_COMMUNITY_DETECTION = os.environ.get("GRAPH_COMMUNITY_DETECTION", "true").lower() == "true"
|
| 80 |
+
GRAPH_VISUALIZATION = os.environ.get("GRAPH_VISUALIZATION", "true").lower() == "true"
|
| 81 |
+
GRAPH_LAYOUT = os.environ.get("GRAPH_LAYOUT", "spring")
|
| 82 |
|
| 83 |
# 混合检索策略配置
|
| 84 |
+
ENABLE_HYBRID_SEARCH = os.environ.get("ENABLE_HYBRID_SEARCH", "true").lower() == "true" # 是否启用混合检索策略
|
| 85 |
+
HYBRID_SEARCH_WEIGHTS = {"vector": 0.5, "keyword": 0.5} # 向量检索和关键词检索的权重
|
| 86 |
KEYWORD_SEARCH_K = 5 # 关键词检索返回的文档数量
|
| 87 |
+
BM25_K1 = float(os.environ.get("BM25_K1", "1.5")) # BM25算法的k1参数
|
| 88 |
+
BM25_B = float(os.environ.get("BM25_B", "0.75")) # BM25算法的b参数
|
| 89 |
|
| 90 |
# 查询扩展优化配置
|
| 91 |
+
ENABLE_QUERY_EXPANSION = os.environ.get("ENABLE_QUERY_EXPANSION", "true").lower() == "true" # 是否启用查询扩展
|
| 92 |
+
QUERY_EXPANSION_MODEL = os.environ.get("QUERY_EXPANSION_MODEL", "mistral") # 用于查询扩展的模型
|
| 93 |
QUERY_EXPANSION_PROMPT = """请为以下查询生成3-5个相关的扩展查询,这些查询应该从不同角度探索原始查询的主题。
|
| 94 |
原始查询: {query}
|
| 95 |
扩展查询: """ # 查询扩展提示模板
|
| 96 |
+
MAX_EXPANDED_QUERIES = int(os.environ.get("QUERY_EXPANSION_TOP_K", "5")) # 最多使用的扩展查询数量
|
| 97 |
|
| 98 |
# 多模态支持配置
|
| 99 |
+
ENABLE_MULTIMODAL = os.environ.get("ENABLE_MULTIMODAL", "true").lower() == "true" # 是否启用多模态支持
|
| 100 |
+
MULTIMODAL_IMAGE_MODEL = os.environ.get("MULTIMODAL_IMAGE_MODEL", "openai/clip-vit-base-patch32") # 图像嵌入模型
|
| 101 |
SUPPORTED_IMAGE_FORMATS = ["jpg", "jpeg", "png", "gif", "bmp"] # 支持的图像格式
|
| 102 |
IMAGE_EMBEDDING_DIM = 512 # 图像嵌入维度
|
| 103 |
MULTIMODAL_WEIGHTS = {"text": 0.7, "image": 0.3} # 文本和图像检索的权重
|