From 76f7367e2fc12a433c7f39a498b78c3baf44cfb1 Mon Sep 17 00:00:00 2001
From: Sam Rolfe <samuelrolfe@gmail.com>
Date: Sat, 28 Feb 2026 14:51:37 +1100
Subject: [PATCH] Restructure: Move services from root to unified repo

Moved updated services from /home/sam/development/ root into aboutme_chat_demo/:
- knowledge_service/ (with ChromaDB, gitea_scraper, FastAPI)
- langgraph_service/ (with LangGraph agent orchestration)
- airflow/ (with DAGs for scheduled ingestion)

All services now in single repo location.
Modular docker-compose files per service maintained.
Removed duplicate nested directories.
Updated files reflect latest working versions.
---
 airflow/dags/gitea_ingestion_dag.py  |  8 +--
 airflow/docker-compose.yml           | 33 +----------
 backend/main.py                      | 82 ++++++++++++++++------------
 docker-compose.yml                   | 17 +++++-
 knowledge_service/gitea_scraper.py   |  3 +-
 langgraph_service/Dockerfile         | 14 +++--
 langgraph_service/docker-compose.yml | 26 +++++++++
 7 files changed, 99 insertions(+), 84 deletions(-)
 create mode 100644 langgraph_service/docker-compose.yml

diff --git a/airflow/dags/gitea_ingestion_dag.py b/airflow/dags/gitea_ingestion_dag.py
index 848938d..1817fcf 100644
--- a/airflow/dags/gitea_ingestion_dag.py
+++ b/airflow/dags/gitea_ingestion_dag.py
@@ -5,10 +5,8 @@ Runs daily to fetch new/updated repos and ingest into ChromaDB.
 from datetime import datetime, timedelta
 from airflow import DAG
 from airflow.operators.python import PythonOperator
-from airflow.providers.http.operators.http import SimpleHttpOperator
 import os
 import sys
-import json
 
 # Add knowledge_service to path for imports
 sys.path.insert(0, '/opt/airflow/dags/repo')
@@ -115,30 +113,26 @@ with DAG(
     'gitea_daily_ingestion',
     default_args=default_args,
     description='Daily ingestion of Gitea repositories into knowledge base',
-    schedule_interval=timedelta(days=1),  # Run daily
+    schedule_interval=timedelta(days=1),
     start_date=datetime(2024, 1, 1),
     catchup=False,
     tags=['gitea', 'ingestion', 'knowledge'],
 ) as dag:
     
-    # Task 1: Fetch repository list
     fetch_repos_task = PythonOperator(
         task_id='fetch_repos',
         python_callable=fetch_gitea_repos,
     )
     
-    # Task 2: Fetch README content
     fetch_readmes_task = PythonOperator(
         task_id='fetch_readmes',
         python_callable=fetch_readmes,
     )
     
-    # Task 3: Ingest into ChromaDB
     ingest_task = PythonOperator(
         task_id='ingest_to_chroma',
         python_callable=ingest_to_chroma,
     )
     
-    # Define task dependencies
     fetch_repos_task >> fetch_readmes_task >> ingest_task
 
diff --git a/airflow/docker-compose.yml b/airflow/docker-compose.yml
index 169bdc2..7a3ea8a 100644
--- a/airflow/docker-compose.yml
+++ b/airflow/docker-compose.yml
@@ -14,12 +14,12 @@ x-airflow-common:
     AIRFLOW__CORE__LOAD_EXAMPLES: 'false'
     AIRFLOW__API__AUTH_BACKENDS: 'airflow.api.auth.backend.basic_auth,airflow.api.auth.backend.session'
     AIRFLOW__SCHEDULER__ENABLE_HEALTH_CHECK: 'true'
-    _PIP_ADDITIONAL_REQUIREMENTS: ${_PIP_ADDITIONAL_REQUIREMENTS:-}
   volumes:
     - ${AIRFLOW_PROJ_DIR:-.}/dags:/opt/airflow/dags
     - ${AIRFLOW_PROJ_DIR:-.}/logs:/opt/airflow/logs
     - ${AIRFLOW_PROJ_DIR:-.}/config:/opt/airflow/config
     - ${AIRFLOW_PROJ_DIR:-.}/plugins:/opt/airflow/plugins
+    - /home/sam/development/knowledge_service:/opt/airflow/dags/repo:ro
   user: "${AIRFLOW_UID:-50000}:0"
   depends_on:
     &airflow-common-depends-on
@@ -115,23 +115,6 @@ services:
     networks:
       - ai-mesh
 
-  airflow-triggerer:
-    <<: *airflow-common
-    command: triggerer
-    healthcheck:
-      test: ["CMD-SHELL", 'airflow jobs check --job-type TriggererJob --hostname "$${HOSTNAME}"']
-      interval: 30s
-      timeout: 10s
-      retries: 5
-      start_period: 30s
-    restart: always
-    depends_on:
-      <<: *airflow-common-depends-on
-      airflow-init:
-        condition: service_completed_successfully
-    networks:
-      - ai-mesh
-
   airflow-init:
     <<: *airflow-common
     entrypoint: /bin/bash
@@ -158,20 +141,6 @@ services:
     networks:
       - ai-mesh
 
-  airflow-cli:
-    <<: *airflow-common
-    profiles:
-      - debug
-    environment:
-      <<: *airflow-common-env
-      CONNECTION_CHECK_MAX_COUNT: "0"
-    command:
-      - bash
-      - -c
-      - airflow
-    networks:
-      - ai-mesh
-
 volumes:
   postgres-db-volume:
 
diff --git a/backend/main.py b/backend/main.py
index a02c2ec..2882518 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -5,6 +5,7 @@ import httpx
 import logging
 import sys
 import traceback
+import os
 
 logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", handlers=[logging.StreamHandler(sys.stdout)])
 logger = logging.getLogger(__name__)
@@ -15,44 +16,53 @@ app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True,
 class MessageRequest(BaseModel):
     message: str
 
-BRAIN_URL = "http://opencode-brain:5000"
-KNOWLEDGE_URL = "http://knowledge-service:8080/query"
-AUTH = httpx.BasicAuth("opencode", "sam4jo")
+LANGGRAPH_URL = os.getenv("LANGGRAPH_URL", "http://langgraph-service:8090")
 
 @app.post("/chat")
 async def chat(request: MessageRequest):
-    user_msg = request.message.lower()
-    timeout_long = httpx.Timeout(180.0, connect=10.0)
-    timeout_short = httpx.Timeout(5.0, connect=2.0)
+    """
+    Routes all queries through LangGraph Supervisor.
+    No hardcoded keywords - LangGraph intelligently routes to:
+    - Librarian: For knowledge base queries (RAG)
+    - Opencode: For coding tasks
+    - Brain: For general LLM queries
+    """
+    logger.info(f"Gateway: Routing query to LangGraph: {request.message}")
     
-    context = ""
-    # Check for keywords to trigger Librarian (DB) lookup
-    if any(kw in user_msg for kw in ["sam", "hobby", "music", "guitar", "skiing", "experience"]):
-        logger.info("Gateway: Consulting Librarian (DB)...")
-        async with httpx.AsyncClient(timeout=timeout_short) as client:
-            try:
-                k_res = await client.post(KNOWLEDGE_URL, json={"question": request.message})
-                if k_res.status_code == 200:
-                    context = k_res.json().get("context", "")
-            except Exception as e:
-                logger.warning(f"Gateway: Librarian offline/slow: {str(e)}")
+    try:
+        async with httpx.AsyncClient(timeout=httpx.Timeout(60.0, connect=10.0)) as client:
+            response = await client.post(
+                f"{LANGGRAPH_URL}/query",
+                json={"query": request.message}
+            )
+            
+            if response.status_code == 200:
+                result = response.json()
+                agent_used = result.get("agent_used", "unknown")
+                logger.info(f"Gateway: Response from {agent_used} agent")
+                return {"response": result["response"]}
+            else:
+                logger.error(f"Gateway: LangGraph error {response.status_code}")
+                return {"response": "Error: Orchestration service unavailable"}
+                
+    except Exception as e:
+        logger.error(f"Gateway: Error routing through LangGraph: {traceback.format_exc()}")
+        return {"response": "Error: Unable to process your request at this time."}
+
+@app.get("/health")
+async def health():
+    return {"status": "healthy", "service": "chat-gateway"}
+
+@app.get("/agents")
+async def list_agents():
+    """List available agents from LangGraph."""
+    try:
+        async with httpx.AsyncClient(timeout=httpx.Timeout(10.0)) as client:
+            response = await client.get(f"{LANGGRAPH_URL}/agents")
+            if response.status_code == 200:
+                return response.json()
+    except Exception as e:
+        logger.error(f"Error fetching agents: {e}")
+    
+    return {"agents": [], "error": "Could not retrieve agent list"}
 
-    # Forward to Brain (LLM)
-    async with httpx.AsyncClient(auth=AUTH, timeout=timeout_long) as brain_client:
-        try:
-            session_res = await brain_client.post(f"{BRAIN_URL}/session", json={"title": "Demo"})
-            session_id = session_res.json()["id"]
-            final_prompt = f"CONTEXT:\n{context}\n\nUSER: {request.message}" if context else request.message
-            response = await brain_client.post(f"{BRAIN_URL}/session/{session_id}/message", json={"parts": [{"type": "text", "text": final_prompt}]})
-            
-            # FIX: Iterate through parts array to find text response
-            data = response.json()
-            if "parts" in data:
-                for part in data["parts"]:
-                    if part.get("type") == "text" and "text" in part:
-                        return {"response": part["text"]}
-            
-            return {"response": "AI responded but no text found in expected format."}
-        except Exception:
-            logger.error(f"Gateway: Brain failure: {traceback.format_exc()}")
-            return {"response": "Error: The Brain is taking too long or is disconnected."}
diff --git a/docker-compose.yml b/docker-compose.yml
index 9ac5c8c..9ec21d0 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -3,7 +3,7 @@ services:
     image: postgres:15-alpine
     environment:
       POSTGRES_USER: sam
-      POSTGRES_PASSWORD: sam4jo 
+      POSTGRES_PASSWORD: sam4jo
       POSTGRES_DB: chat_demo
     ports:
       - "5432:5432"
@@ -11,18 +11,24 @@ services:
       - postgres_data:/var/lib/postgresql/data
     networks:
       - ai-mesh
+    restart: unless-stopped
+
   backend:
     build: ./backend
     ports:
       - "8000:8000"
     environment:
       DATABASE_URL: postgresql://sam:sam4jo@db:5432/chat_demo
+      LANGGRAPH_URL: http://langgraph-service:8090
     volumes:
       - ./backend:/app
     depends_on:
       - db
+      - langgraph-service
     networks:
       - ai-mesh
+    restart: unless-stopped
+
   frontend:
     build: ./frontend
     ports:
@@ -32,10 +38,15 @@ services:
       - /app/node_modules
     environment:
       - CHOKIDAR_USEPOLLING=true
+    depends_on:
+      - backend
     networks:
       - ai-mesh
+
 volumes:
   postgres_data:
+
 networks:
-      ai-mesh:
-        external: true
+  ai-mesh:
+    external: true
+
diff --git a/knowledge_service/gitea_scraper.py b/knowledge_service/gitea_scraper.py
index dd0fda6..7733a31 100644
--- a/knowledge_service/gitea_scraper.py
+++ b/knowledge_service/gitea_scraper.py
@@ -61,7 +61,6 @@ class GiteaScraper:
     
     def get_readme(self, repo_name: str) -> str:
         """Fetch README content for a repository."""
-        # Try common README filenames
         readme_names = ["README.md", "readme.md", "Readme.md", "README.rst"]
         
         for readme_name in readme_names:
@@ -113,7 +112,7 @@ if __name__ == "__main__":
     repos = scraper.get_user_repos()
     print(f"Found {len(repos)} repositories")
     
-    for repo in repos[:3]:  # Test with first 3
+    for repo in repos[:3]:
         print(f"\nRepo: {repo.name}")
         readme = scraper.get_readme(repo.name)
         if readme:
diff --git a/langgraph_service/Dockerfile b/langgraph_service/Dockerfile
index eccd9d6..2c0a228 100644
--- a/langgraph_service/Dockerfile
+++ b/langgraph_service/Dockerfile
@@ -6,16 +6,22 @@ RUN apt-get update && apt-get install -y \
     g++ \
     && rm -rf /var/lib/apt/lists/*
 
-# Create app directory
+# Create directories
+RUN mkdir -p /app/packages /app/code
+
 WORKDIR /app
 
-# Copy requirements
+# Install packages to isolated directory
 COPY requirements.txt .
-RUN pip install --no-cache-dir -r requirements.txt
+RUN pip install --target=/app/packages -r requirements.txt
 
 # Copy code
-COPY . .
+COPY . /app/code/
 
+ENV PYTHONPATH=/app/packages
+ENV PYTHONUNBUFFERED=1
+
+WORKDIR /app/code
 EXPOSE 8090
 
 CMD ["python3", "-m", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8090"]
diff --git a/langgraph_service/docker-compose.yml b/langgraph_service/docker-compose.yml
new file mode 100644
index 0000000..0869c79
--- /dev/null
+++ b/langgraph_service/docker-compose.yml
@@ -0,0 +1,26 @@
+version: '3.8'
+
+services:
+  langgraph-service:
+    build: .
+    image: langgraph-service:latest
+    container_name: langgraph-service
+    ports:
+      - "8090:8090"
+    volumes:
+      # Only mount code files, not packages
+      - ./main.py:/app/code/main.py:ro
+      - ./supervisor_agent.py:/app/code/supervisor_agent.py:ro
+    environment:
+      - PYTHONUNBUFFERED=1
+      - PYTHONPATH=/app/packages
+      - OPENCODE_PASSWORD=${OPENCODE_PASSWORD:-sam4jo}
+      - KNOWLEDGE_SERVICE_URL=http://knowledge-service:8080
+    networks:
+      - ai-mesh
+    restart: unless-stopped
+
+networks:
+  ai-mesh:
+    external: true
+