# local-knowledge-base **Repository Path**: badboy2064/local-knowledge-base ## Basic Information - **Project Name**: local-knowledge-base - **Description**: 基于 FastAPI + ChromaDB + LangChain 的本地知识库问答系统,支持文档向量化存储和流式问答。 - **Primary Language**: Unknown - **License**: Apache-2.0 - **Default Branch**: master - **Homepage**: None - **GVP Project**: No ## Statistics - **Stars**: 0 - **Forks**: 0 - **Created**: 2026-04-26 - **Last Updated**: 2026-04-26 ## Categories & Tags **Categories**: Uncategorized **Tags**: None ## README # 本地知识库问答系统 基于 FastAPI + ChromaDB + LangChain 的本地知识库问答系统,支持文档向量化存储和流式问答。 ## 功能特性 - 文档向量化:支持 PDF、TXT、Markdown、Word 文档入库 - 双模式 Embedding:支持 OpenAI API 或本地 Ollama (BGE-M3) - 流式问答:类似 ChatGPT 的逐字显示效果 - Web 界面:内置可视化问答页面 - 来源追溯:回答显示引用的知识库文档 ## 技术栈 - **框架**: FastAPI + Uvicorn - **向量库**: ChromaDB - **LLM 框架**: LangChain - **文档处理**: PyPDF2, python-docx - **包管理**: UV ## 项目结构 ``` . ├── app/ │ ├── __init__.py │ ├── main.py # FastAPI 应用入口 │ ├── config.py # 配置管理 │ ├── api/ │ │ ├── __init__.py │ │ └── routes.py # API 路由(文档上传、问答、流式接口) │ ├── db/ │ │ ├── __init__.py │ │ └── vector_store.py # ChromaDB 向量存储操作 │ ├── ingestion/ │ │ ├── __init__.py │ │ └── loader.py # 文档加载与文本分割 │ ├── templates/ │ │ └── chat.html # Web 问答界面 │ └── static/ # 静态资源 ├── docs/ # 知识库文档目录 ├── chroma_db/ # ChromaDB 持久化数据 ├── .env # 环境变量配置 ├── .env.example # 环境变量模板 ├── pyproject.toml # UV 项目配置 └── uv.lock # 依赖锁定文件 ``` ## 快速开始 ### 1. 安装依赖 ```bash # 使用 UV 安装依赖 uv sync ``` ### 2. 配置环境变量 复制 `.env.example` 为 `.env`,并填写配置: ```env # LLM 配置(对话模型) OPENAI_API_KEY=your-llm-api-key OPENAI_BASE_URL=https://api.openai.com/v1 LLM_MODEL=gpt-4o-mini # Embedding 配置 # 支持两种模式: "openai" 或 "ollama" EMBEDDING_PROVIDER=ollama # OpenAI 模式配置 EMBEDDING_API_KEY=your-openai-api-key EMBEDDING_BASE_URL=https://api.openai.com/v1 EMBEDDING_MODEL=text-embedding-3-large # Ollama 模式配置(本地 Embedding) OLLAMA_BASE_URL=http://localhost:11434 OLLAMA_EMBEDDING_MODEL=bge-m3 # 其他配置 CHROMA_PERSIST_DIR=./chroma_db DOCS_DIR=./docs RETRIEVAL_TOP_K=4 SERVER_PORT=8000 ``` ### 3. 启动 Ollama(如使用本地 Embedding) ```bash # 安装并启动 Ollama ollama pull bge-m3 ollama serve ``` ### 4. 启动服务 ```bash uv run python app/main.py ``` 访问 http://localhost:8000 使用 Web 界面 ## 核心代码 ### 配置管理 (app/config.py) ```python from pydantic_settings import BaseSettings class Settings(BaseSettings): # LLM 配置 openai_api_key: str = "" openai_base_url: str = "https://api.openai.com/v1" llm_model: str = "gpt-4o-mini" # Embedding 配置 embedding_provider: str = "ollama" # "openai" 或 "ollama" embedding_api_key: str = "" embedding_base_url: str = "https://api.openai.com/v1" embedding_model: str = "text-embedding-3-large" ollama_base_url: str = "http://localhost:11434" ollama_embedding_model: str = "bge-m3" chroma_persist_dir: str = "./chroma_db" docs_dir: str = "./docs" retrieval_top_k: int = 4 server_port: int = 8000 model_config = {"env_file": ".env", "env_file_encoding": "utf-8"} settings = Settings() ``` ### 向量存储 (app/db/vector_store.py) ```python from langchain_chroma import Chroma from langchain.schema import Document from app.config import settings _embeddings = None _vector_store = None def _get_embeddings(): global _embeddings if _embeddings is None: if settings.embedding_provider == "ollama": from langchain_ollama import OllamaEmbeddings _embeddings = OllamaEmbeddings( model=settings.ollama_embedding_model, base_url=settings.ollama_base_url, ) else: from langchain_openai import OpenAIEmbeddings _embeddings = OpenAIEmbeddings( model=settings.embedding_model, openai_api_key=settings.embedding_api_key, openai_api_base=settings.embedding_base_url, ) return _embeddings def get_vector_store() -> Chroma: global _vector_store if _vector_store is None: _vector_store = Chroma( collection_name="knowledge_base", embedding_function=_get_embeddings(), persist_directory=settings.chroma_persist_dir, ) return _vector_store def add_documents(documents: list[Document]): store = get_vector_store() store.add_documents(documents) def similarity_search(query: str, k: int = None) -> list[Document]: store = get_vector_store() if k is None: k = settings.retrieval_top_k return store.similarity_search(query, k=k) ``` ### 文档处理 (app/ingestion/loader.py) ```python from pathlib import Path from typing import List from langchain.schema import Document from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader def _get_loader(file_path: str): ext = Path(file_path).suffix.lower() if ext == ".pdf": return PyPDFLoader(file_path) elif ext == ".txt": return TextLoader(file_path, encoding="utf-8") elif ext == ".docx": return Docx2txtLoader(file_path) elif ext == ".md": return TextLoader(file_path, encoding="utf-8") else: raise ValueError(f"不支持的文件类型: {ext}") def load_single_document(file_path: str) -> List[Document]: loader = _get_loader(file_path) return loader.load() def load_documents_from_dir(dir_path: str = None) -> List[Document]: docs_dir = Path(dir_path or "./docs") supported_exts = {".pdf", ".txt", ".docx", ".md"} all_documents = [] for file_path in docs_dir.rglob("*"): if file_path.suffix.lower() in supported_exts: documents = load_single_document(str(file_path)) all_documents.extend(documents) return all_documents def split_documents(documents: List[Document]) -> List[Document]: splitter = RecursiveCharacterTextSplitter( chunk_size=500, chunk_overlap=100, separators=["\n\n", "\n", "。", " ", ""], ) return splitter.split_documents(documents) ``` ### API 路由 (app/api/routes.py) ```python from fastapi import APIRouter, UploadFile, File, HTTPException from fastapi.responses import StreamingResponse from app.ingestion.loader import load_single_document, split_documents from app.db.vector_store import add_documents, similarity_search from app.config import settings import json router = APIRouter() # 文档上传入库 @router.post("/ingest") async def ingest_document(file: UploadFile = File(...)): content = await file.read() save_path = Path(f"./docs/{file.filename}") with open(save_path, "wb") as f: f.write(content) documents = load_single_document(str(save_path)) chunks = split_documents(documents) add_documents(chunks) return {"message": "文档入库成功", "chunks": len(chunks)} # 批量入库 @router.post("/ingest-directory") async def ingest_directory(): from app.ingestion.loader import load_documents_from_dir documents = load_documents_from_dir() chunks = split_documents(documents) add_documents(chunks) return {"message": "批量入库成功", "total_chunks": len(chunks)} # 普通问答 @router.post("/query") async def query(request: QueryRequest): docs = similarity_search(request.question) # ... 调用 LLM 生成回答 return {"answer": answer, "source_documents": sources} # 流式问答 @router.post("/query/stream") async def query_stream(request: QueryRequest): async def generate(): docs = similarity_search(request.question) source_docs = [doc.metadata.get("source") for doc in docs] context = "\n\n".join([doc.page_content for doc in docs]) # 发送来源信息 yield f"data: {json.dumps({'type': 'sources', 'data': source_docs})}\n\n" # 流式调用 LLM from openai import AsyncOpenAI client = AsyncOpenAI( api_key=settings.openai_api_key, base_url=settings.openai_base_url, ) stream = await client.chat.completions.create( model=settings.llm_model, messages=[ {"role": "system", "content": f"基于上下文回答:{context}"}, {"role": "user", "content": request.question} ], stream=True, ) async for chunk in stream: if chunk.choices[0].delta.content: content = chunk.choices[0].delta.content yield f"data: {json.dumps({'type': 'content', 'data': content})}\n\n" yield f"data: {json.dumps({'type': 'done'})}\n\n" return StreamingResponse(generate(), media_type="text/event-stream") ``` ### 应用入口 (app/main.py) ```python from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import HTMLResponse import uvicorn import sys from pathlib import Path sys.path.insert(0, str(Path(__file__).parent.parent)) from app.api.routes import router from app.config import settings app = FastAPI( title="本地知识库问答系统", description="基于 ChromaDB 向量库的本地知识库问答 API", version="1.0.0", ) app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) app.include_router(router, prefix="/api/v1") @app.get("/health") async def health_check(): return {"status": "ok"} @app.get("/", response_class=HTMLResponse) async def chat_page(): template_path = Path(__file__).parent / "templates" / "chat.html" return template_path.read_text(encoding="utf-8") if __name__ == "__main__": uvicorn.run("app.main:app", host="0.0.0.0", port=settings.server_port, reload=True) ``` ## API 接口 | 接口 | 方法 | 说明 | |------|------|------| | `GET /` | 访问 Web 问答界面 | | `GET /health` | 健康检查 | | `POST /api/v1/ingest` | 上传单个文档入库 | | `POST /api/v1/ingest-directory` | 批量入库 docs/ 目录 | | `POST /api/v1/query` | 普通问答(返回完整 JSON) | | `POST /api/v1/query/stream` | 流式问答(SSE 格式) | ## 使用流程 1. 将知识库文档放入 `docs/` 目录 2. 访问 http://localhost:8000 打开 Web 界面 3. 点击"批量入库"将文档向量化 4. 在输入框输入问题,回车发送 5. 查看流式生成的回答和来源文档 ## 注意事项 - 首次使用需确保 Ollama 已启动且 BGE-M3 模型已下载 - 如使用 OpenAI Embedding,需配置有效的 API Key - 文档分割参数可在 `loader.py` 中调整 - 向量检索数量可在 `.env` 中通过 `RETRIEVAL_TOP_K` 配置