From 76f7367e2fc12a433c7f39a498b78c3baf44cfb1 Mon Sep 17 00:00:00 2001 From: Sam Rolfe Date: Sat, 28 Feb 2026 14:51:37 +1100 Subject: [PATCH] Restructure: Move services from root to unified repo Moved updated services from /home/sam/development/ root into aboutme_chat_demo/: - knowledge_service/ (with ChromaDB, gitea_scraper, FastAPI) - langgraph_service/ (with LangGraph agent orchestration) - airflow/ (with DAGs for scheduled ingestion) All services now in single repo location. Modular docker-compose files per service maintained. Removed duplicate nested directories. Updated files reflect latest working versions. --- airflow/dags/gitea_ingestion_dag.py | 8 +-- airflow/docker-compose.yml | 33 +---------- backend/main.py | 82 ++++++++++++++++------------ docker-compose.yml | 17 +++++- knowledge_service/gitea_scraper.py | 3 +- langgraph_service/Dockerfile | 14 +++-- langgraph_service/docker-compose.yml | 26 +++++++++ 7 files changed, 99 insertions(+), 84 deletions(-) create mode 100644 langgraph_service/docker-compose.yml diff --git a/airflow/dags/gitea_ingestion_dag.py b/airflow/dags/gitea_ingestion_dag.py index 848938d..1817fcf 100644 --- a/airflow/dags/gitea_ingestion_dag.py +++ b/airflow/dags/gitea_ingestion_dag.py @@ -5,10 +5,8 @@ Runs daily to fetch new/updated repos and ingest into ChromaDB. from datetime import datetime, timedelta from airflow import DAG from airflow.operators.python import PythonOperator -from airflow.providers.http.operators.http import SimpleHttpOperator import os import sys -import json # Add knowledge_service to path for imports sys.path.insert(0, '/opt/airflow/dags/repo') @@ -115,30 +113,26 @@ with DAG( 'gitea_daily_ingestion', default_args=default_args, description='Daily ingestion of Gitea repositories into knowledge base', - schedule_interval=timedelta(days=1), # Run daily + schedule_interval=timedelta(days=1), start_date=datetime(2024, 1, 1), catchup=False, tags=['gitea', 'ingestion', 'knowledge'], ) as dag: - # Task 1: Fetch repository list fetch_repos_task = PythonOperator( task_id='fetch_repos', python_callable=fetch_gitea_repos, ) - # Task 2: Fetch README content fetch_readmes_task = PythonOperator( task_id='fetch_readmes', python_callable=fetch_readmes, ) - # Task 3: Ingest into ChromaDB ingest_task = PythonOperator( task_id='ingest_to_chroma', python_callable=ingest_to_chroma, ) - # Define task dependencies fetch_repos_task >> fetch_readmes_task >> ingest_task diff --git a/airflow/docker-compose.yml b/airflow/docker-compose.yml index 169bdc2..7a3ea8a 100644 --- a/airflow/docker-compose.yml +++ b/airflow/docker-compose.yml @@ -14,12 +14,12 @@ x-airflow-common: AIRFLOW__CORE__LOAD_EXAMPLES: 'false' AIRFLOW__API__AUTH_BACKENDS: 'airflow.api.auth.backend.basic_auth,airflow.api.auth.backend.session' AIRFLOW__SCHEDULER__ENABLE_HEALTH_CHECK: 'true' - _PIP_ADDITIONAL_REQUIREMENTS: ${_PIP_ADDITIONAL_REQUIREMENTS:-} volumes: - ${AIRFLOW_PROJ_DIR:-.}/dags:/opt/airflow/dags - ${AIRFLOW_PROJ_DIR:-.}/logs:/opt/airflow/logs - ${AIRFLOW_PROJ_DIR:-.}/config:/opt/airflow/config - ${AIRFLOW_PROJ_DIR:-.}/plugins:/opt/airflow/plugins + - /home/sam/development/knowledge_service:/opt/airflow/dags/repo:ro user: "${AIRFLOW_UID:-50000}:0" depends_on: &airflow-common-depends-on @@ -115,23 +115,6 @@ services: networks: - ai-mesh - airflow-triggerer: - <<: *airflow-common - command: triggerer - healthcheck: - test: ["CMD-SHELL", 'airflow jobs check --job-type TriggererJob --hostname "$${HOSTNAME}"'] - interval: 30s - timeout: 10s - retries: 5 - start_period: 30s - restart: always - depends_on: - <<: *airflow-common-depends-on - airflow-init: - condition: service_completed_successfully - networks: - - ai-mesh - airflow-init: <<: *airflow-common entrypoint: /bin/bash @@ -158,20 +141,6 @@ services: networks: - ai-mesh - airflow-cli: - <<: *airflow-common - profiles: - - debug - environment: - <<: *airflow-common-env - CONNECTION_CHECK_MAX_COUNT: "0" - command: - - bash - - -c - - airflow - networks: - - ai-mesh - volumes: postgres-db-volume: diff --git a/backend/main.py b/backend/main.py index a02c2ec..2882518 100644 --- a/backend/main.py +++ b/backend/main.py @@ -5,6 +5,7 @@ import httpx import logging import sys import traceback +import os logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", handlers=[logging.StreamHandler(sys.stdout)]) logger = logging.getLogger(__name__) @@ -15,44 +16,53 @@ app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True, class MessageRequest(BaseModel): message: str -BRAIN_URL = "http://opencode-brain:5000" -KNOWLEDGE_URL = "http://knowledge-service:8080/query" -AUTH = httpx.BasicAuth("opencode", "sam4jo") +LANGGRAPH_URL = os.getenv("LANGGRAPH_URL", "http://langgraph-service:8090") @app.post("/chat") async def chat(request: MessageRequest): - user_msg = request.message.lower() - timeout_long = httpx.Timeout(180.0, connect=10.0) - timeout_short = httpx.Timeout(5.0, connect=2.0) + """ + Routes all queries through LangGraph Supervisor. + No hardcoded keywords - LangGraph intelligently routes to: + - Librarian: For knowledge base queries (RAG) + - Opencode: For coding tasks + - Brain: For general LLM queries + """ + logger.info(f"Gateway: Routing query to LangGraph: {request.message}") - context = "" - # Check for keywords to trigger Librarian (DB) lookup - if any(kw in user_msg for kw in ["sam", "hobby", "music", "guitar", "skiing", "experience"]): - logger.info("Gateway: Consulting Librarian (DB)...") - async with httpx.AsyncClient(timeout=timeout_short) as client: - try: - k_res = await client.post(KNOWLEDGE_URL, json={"question": request.message}) - if k_res.status_code == 200: - context = k_res.json().get("context", "") - except Exception as e: - logger.warning(f"Gateway: Librarian offline/slow: {str(e)}") + try: + async with httpx.AsyncClient(timeout=httpx.Timeout(60.0, connect=10.0)) as client: + response = await client.post( + f"{LANGGRAPH_URL}/query", + json={"query": request.message} + ) + + if response.status_code == 200: + result = response.json() + agent_used = result.get("agent_used", "unknown") + logger.info(f"Gateway: Response from {agent_used} agent") + return {"response": result["response"]} + else: + logger.error(f"Gateway: LangGraph error {response.status_code}") + return {"response": "Error: Orchestration service unavailable"} + + except Exception as e: + logger.error(f"Gateway: Error routing through LangGraph: {traceback.format_exc()}") + return {"response": "Error: Unable to process your request at this time."} + +@app.get("/health") +async def health(): + return {"status": "healthy", "service": "chat-gateway"} + +@app.get("/agents") +async def list_agents(): + """List available agents from LangGraph.""" + try: + async with httpx.AsyncClient(timeout=httpx.Timeout(10.0)) as client: + response = await client.get(f"{LANGGRAPH_URL}/agents") + if response.status_code == 200: + return response.json() + except Exception as e: + logger.error(f"Error fetching agents: {e}") + + return {"agents": [], "error": "Could not retrieve agent list"} - # Forward to Brain (LLM) - async with httpx.AsyncClient(auth=AUTH, timeout=timeout_long) as brain_client: - try: - session_res = await brain_client.post(f"{BRAIN_URL}/session", json={"title": "Demo"}) - session_id = session_res.json()["id"] - final_prompt = f"CONTEXT:\n{context}\n\nUSER: {request.message}" if context else request.message - response = await brain_client.post(f"{BRAIN_URL}/session/{session_id}/message", json={"parts": [{"type": "text", "text": final_prompt}]}) - - # FIX: Iterate through parts array to find text response - data = response.json() - if "parts" in data: - for part in data["parts"]: - if part.get("type") == "text" and "text" in part: - return {"response": part["text"]} - - return {"response": "AI responded but no text found in expected format."} - except Exception: - logger.error(f"Gateway: Brain failure: {traceback.format_exc()}") - return {"response": "Error: The Brain is taking too long or is disconnected."} diff --git a/docker-compose.yml b/docker-compose.yml index 9ac5c8c..9ec21d0 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,7 +3,7 @@ services: image: postgres:15-alpine environment: POSTGRES_USER: sam - POSTGRES_PASSWORD: sam4jo + POSTGRES_PASSWORD: sam4jo POSTGRES_DB: chat_demo ports: - "5432:5432" @@ -11,18 +11,24 @@ services: - postgres_data:/var/lib/postgresql/data networks: - ai-mesh + restart: unless-stopped + backend: build: ./backend ports: - "8000:8000" environment: DATABASE_URL: postgresql://sam:sam4jo@db:5432/chat_demo + LANGGRAPH_URL: http://langgraph-service:8090 volumes: - ./backend:/app depends_on: - db + - langgraph-service networks: - ai-mesh + restart: unless-stopped + frontend: build: ./frontend ports: @@ -32,10 +38,15 @@ services: - /app/node_modules environment: - CHOKIDAR_USEPOLLING=true + depends_on: + - backend networks: - ai-mesh + volumes: postgres_data: + networks: - ai-mesh: - external: true + ai-mesh: + external: true + diff --git a/knowledge_service/gitea_scraper.py b/knowledge_service/gitea_scraper.py index dd0fda6..7733a31 100644 --- a/knowledge_service/gitea_scraper.py +++ b/knowledge_service/gitea_scraper.py @@ -61,7 +61,6 @@ class GiteaScraper: def get_readme(self, repo_name: str) -> str: """Fetch README content for a repository.""" - # Try common README filenames readme_names = ["README.md", "readme.md", "Readme.md", "README.rst"] for readme_name in readme_names: @@ -113,7 +112,7 @@ if __name__ == "__main__": repos = scraper.get_user_repos() print(f"Found {len(repos)} repositories") - for repo in repos[:3]: # Test with first 3 + for repo in repos[:3]: print(f"\nRepo: {repo.name}") readme = scraper.get_readme(repo.name) if readme: diff --git a/langgraph_service/Dockerfile b/langgraph_service/Dockerfile index eccd9d6..2c0a228 100644 --- a/langgraph_service/Dockerfile +++ b/langgraph_service/Dockerfile @@ -6,16 +6,22 @@ RUN apt-get update && apt-get install -y \ g++ \ && rm -rf /var/lib/apt/lists/* -# Create app directory +# Create directories +RUN mkdir -p /app/packages /app/code + WORKDIR /app -# Copy requirements +# Install packages to isolated directory COPY requirements.txt . -RUN pip install --no-cache-dir -r requirements.txt +RUN pip install --target=/app/packages -r requirements.txt # Copy code -COPY . . +COPY . /app/code/ +ENV PYTHONPATH=/app/packages +ENV PYTHONUNBUFFERED=1 + +WORKDIR /app/code EXPOSE 8090 CMD ["python3", "-m", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8090"] diff --git a/langgraph_service/docker-compose.yml b/langgraph_service/docker-compose.yml new file mode 100644 index 0000000..0869c79 --- /dev/null +++ b/langgraph_service/docker-compose.yml @@ -0,0 +1,26 @@ +version: '3.8' + +services: + langgraph-service: + build: . + image: langgraph-service:latest + container_name: langgraph-service + ports: + - "8090:8090" + volumes: + # Only mount code files, not packages + - ./main.py:/app/code/main.py:ro + - ./supervisor_agent.py:/app/code/supervisor_agent.py:ro + environment: + - PYTHONUNBUFFERED=1 + - PYTHONPATH=/app/packages + - OPENCODE_PASSWORD=${OPENCODE_PASSWORD:-sam4jo} + - KNOWLEDGE_SERVICE_URL=http://knowledge-service:8080 + networks: + - ai-mesh + restart: unless-stopped + +networks: + ai-mesh: + external: true +