Initial commit: Multi-service AI agent system
- Frontend: Vite + React + TypeScript chat interface - Backend: FastAPI gateway with LangGraph routing - Knowledge Service: ChromaDB RAG with Gitea scraper - LangGraph Service: Multi-agent orchestration - Airflow: Scheduled Gitea ingestion DAG - Documentation: Complete plan and implementation guides Architecture: - Modular Docker Compose per service - External ai-mesh network for communication - Fast rebuilds with /app/packages pattern - Intelligent agent routing (no hardcoded keywords) Services: - Frontend (5173): React chat UI - Chat Gateway (8000): FastAPI entry point - LangGraph (8090): Agent orchestration - Knowledge (8080): ChromaDB RAG - Airflow (8081): Scheduled ingestion - PostgreSQL (5432): Chat history Excludes: node_modules, .venv, chroma_db, logs, .env files Includes: All source code, configs, docs, docker files
This commit is contained in:
52
knowledge_service/main.py
Normal file
52
knowledge_service/main.py
Normal file
@@ -0,0 +1,52 @@
|
||||
from fastapi import FastAPI
|
||||
from pydantic import BaseModel
|
||||
from langchain_community.document_loaders import TextLoader
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
from langchain_community.vectorstores import Chroma
|
||||
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
||||
import os
|
||||
import logging
|
||||
import sys
|
||||
|
||||
logging.basicConfig(level=logging.INFO, stream=sys.stdout)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
app = FastAPI()
|
||||
vector_db = None
|
||||
|
||||
# Voyage-2 embeddings via OpenRouter API
|
||||
embeddings = OpenAIEmbeddings(
|
||||
model="openai/text-embedding-3-small",
|
||||
openai_api_base="https://openrouter.ai/api/v1",
|
||||
openai_api_key=os.getenv("OPENROUTER_API_KEY")
|
||||
)
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup_event():
|
||||
global vector_db
|
||||
data_path = "./data/hobbies.md"
|
||||
if os.path.exists(data_path):
|
||||
try:
|
||||
loader = TextLoader(data_path)
|
||||
documents = loader.load()
|
||||
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
|
||||
chunks = text_splitter.split_documents(documents)
|
||||
vector_db = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory="./chroma_db")
|
||||
logger.info("Librarian: ChromaDB is loaded with openAi embeddings.")
|
||||
except Exception as e:
|
||||
logger.error(f"Librarian: DB error: {str(e)}")
|
||||
else:
|
||||
logger.warning(f"Librarian: Missing data file at {data_path}")
|
||||
|
||||
@app.get("/health")
|
||||
async def health():
|
||||
return {"status": "ready", "vectors_loaded": vector_db is not None}
|
||||
|
||||
class QueryRequest(BaseModel):
|
||||
question: str
|
||||
|
||||
@app.post("/query")
|
||||
async def query_knowledge(request: QueryRequest):
|
||||
if not vector_db: return {"context": ""}
|
||||
results = vector_db.similarity_search(request.question, k=2)
|
||||
return {"context": "\n".join([res.page_content for res in results])}
|
||||
Reference in New Issue
Block a user