Initial commit: Multi-service AI agent system

- Frontend: Vite + React + TypeScript chat interface - Backend: FastAPI gateway with LangGraph routing - Knowledge Service: ChromaDB RAG with Gitea scraper - LangGraph Service: Multi-agent orchestration - Airflow: Scheduled Gitea ingestion DAG - Documentation: Complete plan and implementation guides Architecture: - Modular Docker Compose per service - External ai-mesh network for communication - Fast rebuilds with /app/packages pattern - Intelligent agent routing (no hardcoded keywords) Services: - Frontend (5173): React chat UI - Chat Gateway (8000): FastAPI entry point - LangGraph (8090): Agent orchestration - Knowledge (8080): ChromaDB RAG - Airflow (8081): Scheduled ingestion - PostgreSQL (5432): Chat history Excludes: node_modules, .venv, chroma_db, logs, .env files Includes: All source code, configs, docs, docker files
2026-02-27 19:51:06 +11:00
commit 628ba96998
44 changed files with 7177 additions and 0 deletions
--- a/knowledge_service/main.py
+++ b/knowledge_service/main.py
@@ -0,0 +1,52 @@
+from fastapi import FastAPI
+from pydantic import BaseModel
+from langchain_community.document_loaders import TextLoader
+from langchain_openai import OpenAIEmbeddings
+from langchain_community.vectorstores import Chroma
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+import os
+import logging
+import sys
+
+logging.basicConfig(level=logging.INFO, stream=sys.stdout)
+logger = logging.getLogger(__name__)
+
+app = FastAPI()
+vector_db = None
+
+# Voyage-2 embeddings via OpenRouter API
+embeddings = OpenAIEmbeddings(
+    model="openai/text-embedding-3-small",
+    openai_api_base="https://openrouter.ai/api/v1",
+    openai_api_key=os.getenv("OPENROUTER_API_KEY")
+)
+
+@app.on_event("startup")
+async def startup_event():
+    global vector_db
+    data_path = "./data/hobbies.md"
+    if os.path.exists(data_path):
+        try:
+            loader = TextLoader(data_path)
+            documents = loader.load()
+            text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
+            chunks = text_splitter.split_documents(documents)
+            vector_db = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory="./chroma_db")
+            logger.info("Librarian: ChromaDB is loaded with openAi embeddings.")
+        except Exception as e:
+            logger.error(f"Librarian: DB error: {str(e)}")
+    else:
+        logger.warning(f"Librarian: Missing data file at {data_path}")
+
+@app.get("/health")
+async def health():
+    return {"status": "ready", "vectors_loaded": vector_db is not None}
+
+class QueryRequest(BaseModel):
+    question: str
+
+@app.post("/query")
+async def query_knowledge(request: QueryRequest):
+    if not vector_db: return {"context": ""}
+    results = vector_db.similarity_search(request.question, k=2)
+    return {"context": "\n".join([res.page_content for res in results])}