Restructure: Move services from root to unified repo

Moved updated services from /home/sam/development/ root into aboutme_chat_demo/:
- knowledge_service/ (with ChromaDB, gitea_scraper, FastAPI)
- langgraph_service/ (with LangGraph agent orchestration)
- airflow/ (with DAGs for scheduled ingestion)

All services now in single repo location.
Modular docker-compose files per service maintained.
Removed duplicate nested directories.
Updated files reflect latest working versions.
This commit is contained in:
2026-02-28 14:51:37 +11:00
parent 628ba96998
commit 76f7367e2f
7 changed files with 99 additions and 84 deletions

View File

@@ -5,10 +5,8 @@ Runs daily to fetch new/updated repos and ingest into ChromaDB.
from datetime import datetime, timedelta
from airflow import DAG
from airflow.operators.python import PythonOperator
from airflow.providers.http.operators.http import SimpleHttpOperator
import os
import sys
import json
# Add knowledge_service to path for imports
sys.path.insert(0, '/opt/airflow/dags/repo')
@@ -115,30 +113,26 @@ with DAG(
'gitea_daily_ingestion',
default_args=default_args,
description='Daily ingestion of Gitea repositories into knowledge base',
schedule_interval=timedelta(days=1), # Run daily
schedule_interval=timedelta(days=1),
start_date=datetime(2024, 1, 1),
catchup=False,
tags=['gitea', 'ingestion', 'knowledge'],
) as dag:
# Task 1: Fetch repository list
fetch_repos_task = PythonOperator(
task_id='fetch_repos',
python_callable=fetch_gitea_repos,
)
# Task 2: Fetch README content
fetch_readmes_task = PythonOperator(
task_id='fetch_readmes',
python_callable=fetch_readmes,
)
# Task 3: Ingest into ChromaDB
ingest_task = PythonOperator(
task_id='ingest_to_chroma',
python_callable=ingest_to_chroma,
)
# Define task dependencies
fetch_repos_task >> fetch_readmes_task >> ingest_task

View File

@@ -14,12 +14,12 @@ x-airflow-common:
AIRFLOW__CORE__LOAD_EXAMPLES: 'false'
AIRFLOW__API__AUTH_BACKENDS: 'airflow.api.auth.backend.basic_auth,airflow.api.auth.backend.session'
AIRFLOW__SCHEDULER__ENABLE_HEALTH_CHECK: 'true'
_PIP_ADDITIONAL_REQUIREMENTS: ${_PIP_ADDITIONAL_REQUIREMENTS:-}
volumes:
- ${AIRFLOW_PROJ_DIR:-.}/dags:/opt/airflow/dags
- ${AIRFLOW_PROJ_DIR:-.}/logs:/opt/airflow/logs
- ${AIRFLOW_PROJ_DIR:-.}/config:/opt/airflow/config
- ${AIRFLOW_PROJ_DIR:-.}/plugins:/opt/airflow/plugins
- /home/sam/development/knowledge_service:/opt/airflow/dags/repo:ro
user: "${AIRFLOW_UID:-50000}:0"
depends_on:
&airflow-common-depends-on
@@ -115,23 +115,6 @@ services:
networks:
- ai-mesh
airflow-triggerer:
<<: *airflow-common
command: triggerer
healthcheck:
test: ["CMD-SHELL", 'airflow jobs check --job-type TriggererJob --hostname "$${HOSTNAME}"']
interval: 30s
timeout: 10s
retries: 5
start_period: 30s
restart: always
depends_on:
<<: *airflow-common-depends-on
airflow-init:
condition: service_completed_successfully
networks:
- ai-mesh
airflow-init:
<<: *airflow-common
entrypoint: /bin/bash
@@ -158,20 +141,6 @@ services:
networks:
- ai-mesh
airflow-cli:
<<: *airflow-common
profiles:
- debug
environment:
<<: *airflow-common-env
CONNECTION_CHECK_MAX_COUNT: "0"
command:
- bash
- -c
- airflow
networks:
- ai-mesh
volumes:
postgres-db-volume:

View File

@@ -5,6 +5,7 @@ import httpx
import logging
import sys
import traceback
import os
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", handlers=[logging.StreamHandler(sys.stdout)])
logger = logging.getLogger(__name__)
@@ -15,44 +16,53 @@ app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True,
class MessageRequest(BaseModel):
message: str
BRAIN_URL = "http://opencode-brain:5000"
KNOWLEDGE_URL = "http://knowledge-service:8080/query"
AUTH = httpx.BasicAuth("opencode", "sam4jo")
LANGGRAPH_URL = os.getenv("LANGGRAPH_URL", "http://langgraph-service:8090")
@app.post("/chat")
async def chat(request: MessageRequest):
user_msg = request.message.lower()
timeout_long = httpx.Timeout(180.0, connect=10.0)
timeout_short = httpx.Timeout(5.0, connect=2.0)
"""
Routes all queries through LangGraph Supervisor.
No hardcoded keywords - LangGraph intelligently routes to:
- Librarian: For knowledge base queries (RAG)
- Opencode: For coding tasks
- Brain: For general LLM queries
"""
logger.info(f"Gateway: Routing query to LangGraph: {request.message}")
context = ""
# Check for keywords to trigger Librarian (DB) lookup
if any(kw in user_msg for kw in ["sam", "hobby", "music", "guitar", "skiing", "experience"]):
logger.info("Gateway: Consulting Librarian (DB)...")
async with httpx.AsyncClient(timeout=timeout_short) as client:
try:
k_res = await client.post(KNOWLEDGE_URL, json={"question": request.message})
if k_res.status_code == 200:
context = k_res.json().get("context", "")
except Exception as e:
logger.warning(f"Gateway: Librarian offline/slow: {str(e)}")
try:
async with httpx.AsyncClient(timeout=httpx.Timeout(60.0, connect=10.0)) as client:
response = await client.post(
f"{LANGGRAPH_URL}/query",
json={"query": request.message}
)
if response.status_code == 200:
result = response.json()
agent_used = result.get("agent_used", "unknown")
logger.info(f"Gateway: Response from {agent_used} agent")
return {"response": result["response"]}
else:
logger.error(f"Gateway: LangGraph error {response.status_code}")
return {"response": "Error: Orchestration service unavailable"}
except Exception as e:
logger.error(f"Gateway: Error routing through LangGraph: {traceback.format_exc()}")
return {"response": "Error: Unable to process your request at this time."}
@app.get("/health")
async def health():
return {"status": "healthy", "service": "chat-gateway"}
@app.get("/agents")
async def list_agents():
"""List available agents from LangGraph."""
try:
async with httpx.AsyncClient(timeout=httpx.Timeout(10.0)) as client:
response = await client.get(f"{LANGGRAPH_URL}/agents")
if response.status_code == 200:
return response.json()
except Exception as e:
logger.error(f"Error fetching agents: {e}")
return {"agents": [], "error": "Could not retrieve agent list"}
# Forward to Brain (LLM)
async with httpx.AsyncClient(auth=AUTH, timeout=timeout_long) as brain_client:
try:
session_res = await brain_client.post(f"{BRAIN_URL}/session", json={"title": "Demo"})
session_id = session_res.json()["id"]
final_prompt = f"CONTEXT:\n{context}\n\nUSER: {request.message}" if context else request.message
response = await brain_client.post(f"{BRAIN_URL}/session/{session_id}/message", json={"parts": [{"type": "text", "text": final_prompt}]})
# FIX: Iterate through parts array to find text response
data = response.json()
if "parts" in data:
for part in data["parts"]:
if part.get("type") == "text" and "text" in part:
return {"response": part["text"]}
return {"response": "AI responded but no text found in expected format."}
except Exception:
logger.error(f"Gateway: Brain failure: {traceback.format_exc()}")
return {"response": "Error: The Brain is taking too long or is disconnected."}

View File

@@ -3,7 +3,7 @@ services:
image: postgres:15-alpine
environment:
POSTGRES_USER: sam
POSTGRES_PASSWORD: sam4jo
POSTGRES_PASSWORD: sam4jo
POSTGRES_DB: chat_demo
ports:
- "5432:5432"
@@ -11,18 +11,24 @@ services:
- postgres_data:/var/lib/postgresql/data
networks:
- ai-mesh
restart: unless-stopped
backend:
build: ./backend
ports:
- "8000:8000"
environment:
DATABASE_URL: postgresql://sam:sam4jo@db:5432/chat_demo
LANGGRAPH_URL: http://langgraph-service:8090
volumes:
- ./backend:/app
depends_on:
- db
- langgraph-service
networks:
- ai-mesh
restart: unless-stopped
frontend:
build: ./frontend
ports:
@@ -32,10 +38,15 @@ services:
- /app/node_modules
environment:
- CHOKIDAR_USEPOLLING=true
depends_on:
- backend
networks:
- ai-mesh
volumes:
postgres_data:
networks:
ai-mesh:
external: true
ai-mesh:
external: true

View File

@@ -61,7 +61,6 @@ class GiteaScraper:
def get_readme(self, repo_name: str) -> str:
"""Fetch README content for a repository."""
# Try common README filenames
readme_names = ["README.md", "readme.md", "Readme.md", "README.rst"]
for readme_name in readme_names:
@@ -113,7 +112,7 @@ if __name__ == "__main__":
repos = scraper.get_user_repos()
print(f"Found {len(repos)} repositories")
for repo in repos[:3]: # Test with first 3
for repo in repos[:3]:
print(f"\nRepo: {repo.name}")
readme = scraper.get_readme(repo.name)
if readme:

View File

@@ -6,16 +6,22 @@ RUN apt-get update && apt-get install -y \
g++ \
&& rm -rf /var/lib/apt/lists/*
# Create app directory
# Create directories
RUN mkdir -p /app/packages /app/code
WORKDIR /app
# Copy requirements
# Install packages to isolated directory
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
RUN pip install --target=/app/packages -r requirements.txt
# Copy code
COPY . .
COPY . /app/code/
ENV PYTHONPATH=/app/packages
ENV PYTHONUNBUFFERED=1
WORKDIR /app/code
EXPOSE 8090
CMD ["python3", "-m", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8090"]

View File

@@ -0,0 +1,26 @@
version: '3.8'
services:
langgraph-service:
build: .
image: langgraph-service:latest
container_name: langgraph-service
ports:
- "8090:8090"
volumes:
# Only mount code files, not packages
- ./main.py:/app/code/main.py:ro
- ./supervisor_agent.py:/app/code/supervisor_agent.py:ro
environment:
- PYTHONUNBUFFERED=1
- PYTHONPATH=/app/packages
- OPENCODE_PASSWORD=${OPENCODE_PASSWORD:-sam4jo}
- KNOWLEDGE_SERVICE_URL=http://knowledge-service:8080
networks:
- ai-mesh
restart: unless-stopped
networks:
ai-mesh:
external: true