Restructure: Move services from root to unified repo

Moved updated services from /home/sam/development/ root into aboutme_chat_demo/:
- knowledge_service/ (with ChromaDB, gitea_scraper, FastAPI)
- langgraph_service/ (with LangGraph agent orchestration)
- airflow/ (with DAGs for scheduled ingestion)

All services now in single repo location.
Modular docker-compose files per service maintained.
Removed duplicate nested directories.
Updated files reflect latest working versions.
This commit is contained in:
2026-02-28 14:51:37 +11:00
parent 628ba96998
commit 76f7367e2f
7 changed files with 99 additions and 84 deletions

View File

@@ -5,10 +5,8 @@ Runs daily to fetch new/updated repos and ingest into ChromaDB.
from datetime import datetime, timedelta from datetime import datetime, timedelta
from airflow import DAG from airflow import DAG
from airflow.operators.python import PythonOperator from airflow.operators.python import PythonOperator
from airflow.providers.http.operators.http import SimpleHttpOperator
import os import os
import sys import sys
import json
# Add knowledge_service to path for imports # Add knowledge_service to path for imports
sys.path.insert(0, '/opt/airflow/dags/repo') sys.path.insert(0, '/opt/airflow/dags/repo')
@@ -115,30 +113,26 @@ with DAG(
'gitea_daily_ingestion', 'gitea_daily_ingestion',
default_args=default_args, default_args=default_args,
description='Daily ingestion of Gitea repositories into knowledge base', description='Daily ingestion of Gitea repositories into knowledge base',
schedule_interval=timedelta(days=1), # Run daily schedule_interval=timedelta(days=1),
start_date=datetime(2024, 1, 1), start_date=datetime(2024, 1, 1),
catchup=False, catchup=False,
tags=['gitea', 'ingestion', 'knowledge'], tags=['gitea', 'ingestion', 'knowledge'],
) as dag: ) as dag:
# Task 1: Fetch repository list
fetch_repos_task = PythonOperator( fetch_repos_task = PythonOperator(
task_id='fetch_repos', task_id='fetch_repos',
python_callable=fetch_gitea_repos, python_callable=fetch_gitea_repos,
) )
# Task 2: Fetch README content
fetch_readmes_task = PythonOperator( fetch_readmes_task = PythonOperator(
task_id='fetch_readmes', task_id='fetch_readmes',
python_callable=fetch_readmes, python_callable=fetch_readmes,
) )
# Task 3: Ingest into ChromaDB
ingest_task = PythonOperator( ingest_task = PythonOperator(
task_id='ingest_to_chroma', task_id='ingest_to_chroma',
python_callable=ingest_to_chroma, python_callable=ingest_to_chroma,
) )
# Define task dependencies
fetch_repos_task >> fetch_readmes_task >> ingest_task fetch_repos_task >> fetch_readmes_task >> ingest_task

View File

@@ -14,12 +14,12 @@ x-airflow-common:
AIRFLOW__CORE__LOAD_EXAMPLES: 'false' AIRFLOW__CORE__LOAD_EXAMPLES: 'false'
AIRFLOW__API__AUTH_BACKENDS: 'airflow.api.auth.backend.basic_auth,airflow.api.auth.backend.session' AIRFLOW__API__AUTH_BACKENDS: 'airflow.api.auth.backend.basic_auth,airflow.api.auth.backend.session'
AIRFLOW__SCHEDULER__ENABLE_HEALTH_CHECK: 'true' AIRFLOW__SCHEDULER__ENABLE_HEALTH_CHECK: 'true'
_PIP_ADDITIONAL_REQUIREMENTS: ${_PIP_ADDITIONAL_REQUIREMENTS:-}
volumes: volumes:
- ${AIRFLOW_PROJ_DIR:-.}/dags:/opt/airflow/dags - ${AIRFLOW_PROJ_DIR:-.}/dags:/opt/airflow/dags
- ${AIRFLOW_PROJ_DIR:-.}/logs:/opt/airflow/logs - ${AIRFLOW_PROJ_DIR:-.}/logs:/opt/airflow/logs
- ${AIRFLOW_PROJ_DIR:-.}/config:/opt/airflow/config - ${AIRFLOW_PROJ_DIR:-.}/config:/opt/airflow/config
- ${AIRFLOW_PROJ_DIR:-.}/plugins:/opt/airflow/plugins - ${AIRFLOW_PROJ_DIR:-.}/plugins:/opt/airflow/plugins
- /home/sam/development/knowledge_service:/opt/airflow/dags/repo:ro
user: "${AIRFLOW_UID:-50000}:0" user: "${AIRFLOW_UID:-50000}:0"
depends_on: depends_on:
&airflow-common-depends-on &airflow-common-depends-on
@@ -115,23 +115,6 @@ services:
networks: networks:
- ai-mesh - ai-mesh
airflow-triggerer:
<<: *airflow-common
command: triggerer
healthcheck:
test: ["CMD-SHELL", 'airflow jobs check --job-type TriggererJob --hostname "$${HOSTNAME}"']
interval: 30s
timeout: 10s
retries: 5
start_period: 30s
restart: always
depends_on:
<<: *airflow-common-depends-on
airflow-init:
condition: service_completed_successfully
networks:
- ai-mesh
airflow-init: airflow-init:
<<: *airflow-common <<: *airflow-common
entrypoint: /bin/bash entrypoint: /bin/bash
@@ -158,20 +141,6 @@ services:
networks: networks:
- ai-mesh - ai-mesh
airflow-cli:
<<: *airflow-common
profiles:
- debug
environment:
<<: *airflow-common-env
CONNECTION_CHECK_MAX_COUNT: "0"
command:
- bash
- -c
- airflow
networks:
- ai-mesh
volumes: volumes:
postgres-db-volume: postgres-db-volume:

View File

@@ -5,6 +5,7 @@ import httpx
import logging import logging
import sys import sys
import traceback import traceback
import os
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", handlers=[logging.StreamHandler(sys.stdout)]) logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", handlers=[logging.StreamHandler(sys.stdout)])
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -15,44 +16,53 @@ app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True,
class MessageRequest(BaseModel): class MessageRequest(BaseModel):
message: str message: str
BRAIN_URL = "http://opencode-brain:5000" LANGGRAPH_URL = os.getenv("LANGGRAPH_URL", "http://langgraph-service:8090")
KNOWLEDGE_URL = "http://knowledge-service:8080/query"
AUTH = httpx.BasicAuth("opencode", "sam4jo")
@app.post("/chat") @app.post("/chat")
async def chat(request: MessageRequest): async def chat(request: MessageRequest):
user_msg = request.message.lower() """
timeout_long = httpx.Timeout(180.0, connect=10.0) Routes all queries through LangGraph Supervisor.
timeout_short = httpx.Timeout(5.0, connect=2.0) No hardcoded keywords - LangGraph intelligently routes to:
- Librarian: For knowledge base queries (RAG)
- Opencode: For coding tasks
- Brain: For general LLM queries
"""
logger.info(f"Gateway: Routing query to LangGraph: {request.message}")
context = "" try:
# Check for keywords to trigger Librarian (DB) lookup async with httpx.AsyncClient(timeout=httpx.Timeout(60.0, connect=10.0)) as client:
if any(kw in user_msg for kw in ["sam", "hobby", "music", "guitar", "skiing", "experience"]): response = await client.post(
logger.info("Gateway: Consulting Librarian (DB)...") f"{LANGGRAPH_URL}/query",
async with httpx.AsyncClient(timeout=timeout_short) as client: json={"query": request.message}
try: )
k_res = await client.post(KNOWLEDGE_URL, json={"question": request.message})
if k_res.status_code == 200:
context = k_res.json().get("context", "")
except Exception as e:
logger.warning(f"Gateway: Librarian offline/slow: {str(e)}")
# Forward to Brain (LLM) if response.status_code == 200:
async with httpx.AsyncClient(auth=AUTH, timeout=timeout_long) as brain_client: result = response.json()
try: agent_used = result.get("agent_used", "unknown")
session_res = await brain_client.post(f"{BRAIN_URL}/session", json={"title": "Demo"}) logger.info(f"Gateway: Response from {agent_used} agent")
session_id = session_res.json()["id"] return {"response": result["response"]}
final_prompt = f"CONTEXT:\n{context}\n\nUSER: {request.message}" if context else request.message else:
response = await brain_client.post(f"{BRAIN_URL}/session/{session_id}/message", json={"parts": [{"type": "text", "text": final_prompt}]}) logger.error(f"Gateway: LangGraph error {response.status_code}")
return {"response": "Error: Orchestration service unavailable"}
# FIX: Iterate through parts array to find text response except Exception as e:
data = response.json() logger.error(f"Gateway: Error routing through LangGraph: {traceback.format_exc()}")
if "parts" in data: return {"response": "Error: Unable to process your request at this time."}
for part in data["parts"]:
if part.get("type") == "text" and "text" in part: @app.get("/health")
return {"response": part["text"]} async def health():
return {"status": "healthy", "service": "chat-gateway"}
@app.get("/agents")
async def list_agents():
"""List available agents from LangGraph."""
try:
async with httpx.AsyncClient(timeout=httpx.Timeout(10.0)) as client:
response = await client.get(f"{LANGGRAPH_URL}/agents")
if response.status_code == 200:
return response.json()
except Exception as e:
logger.error(f"Error fetching agents: {e}")
return {"agents": [], "error": "Could not retrieve agent list"}
return {"response": "AI responded but no text found in expected format."}
except Exception:
logger.error(f"Gateway: Brain failure: {traceback.format_exc()}")
return {"response": "Error: The Brain is taking too long or is disconnected."}

View File

@@ -11,18 +11,24 @@ services:
- postgres_data:/var/lib/postgresql/data - postgres_data:/var/lib/postgresql/data
networks: networks:
- ai-mesh - ai-mesh
restart: unless-stopped
backend: backend:
build: ./backend build: ./backend
ports: ports:
- "8000:8000" - "8000:8000"
environment: environment:
DATABASE_URL: postgresql://sam:sam4jo@db:5432/chat_demo DATABASE_URL: postgresql://sam:sam4jo@db:5432/chat_demo
LANGGRAPH_URL: http://langgraph-service:8090
volumes: volumes:
- ./backend:/app - ./backend:/app
depends_on: depends_on:
- db - db
- langgraph-service
networks: networks:
- ai-mesh - ai-mesh
restart: unless-stopped
frontend: frontend:
build: ./frontend build: ./frontend
ports: ports:
@@ -32,10 +38,15 @@ services:
- /app/node_modules - /app/node_modules
environment: environment:
- CHOKIDAR_USEPOLLING=true - CHOKIDAR_USEPOLLING=true
depends_on:
- backend
networks: networks:
- ai-mesh - ai-mesh
volumes: volumes:
postgres_data: postgres_data:
networks: networks:
ai-mesh: ai-mesh:
external: true external: true

View File

@@ -61,7 +61,6 @@ class GiteaScraper:
def get_readme(self, repo_name: str) -> str: def get_readme(self, repo_name: str) -> str:
"""Fetch README content for a repository.""" """Fetch README content for a repository."""
# Try common README filenames
readme_names = ["README.md", "readme.md", "Readme.md", "README.rst"] readme_names = ["README.md", "readme.md", "Readme.md", "README.rst"]
for readme_name in readme_names: for readme_name in readme_names:
@@ -113,7 +112,7 @@ if __name__ == "__main__":
repos = scraper.get_user_repos() repos = scraper.get_user_repos()
print(f"Found {len(repos)} repositories") print(f"Found {len(repos)} repositories")
for repo in repos[:3]: # Test with first 3 for repo in repos[:3]:
print(f"\nRepo: {repo.name}") print(f"\nRepo: {repo.name}")
readme = scraper.get_readme(repo.name) readme = scraper.get_readme(repo.name)
if readme: if readme:

View File

@@ -6,16 +6,22 @@ RUN apt-get update && apt-get install -y \
g++ \ g++ \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
# Create app directory # Create directories
RUN mkdir -p /app/packages /app/code
WORKDIR /app WORKDIR /app
# Copy requirements # Install packages to isolated directory
COPY requirements.txt . COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt RUN pip install --target=/app/packages -r requirements.txt
# Copy code # Copy code
COPY . . COPY . /app/code/
ENV PYTHONPATH=/app/packages
ENV PYTHONUNBUFFERED=1
WORKDIR /app/code
EXPOSE 8090 EXPOSE 8090
CMD ["python3", "-m", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8090"] CMD ["python3", "-m", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8090"]

View File

@@ -0,0 +1,26 @@
version: '3.8'
services:
langgraph-service:
build: .
image: langgraph-service:latest
container_name: langgraph-service
ports:
- "8090:8090"
volumes:
# Only mount code files, not packages
- ./main.py:/app/code/main.py:ro
- ./supervisor_agent.py:/app/code/supervisor_agent.py:ro
environment:
- PYTHONUNBUFFERED=1
- PYTHONPATH=/app/packages
- OPENCODE_PASSWORD=${OPENCODE_PASSWORD:-sam4jo}
- KNOWLEDGE_SERVICE_URL=http://knowledge-service:8080
networks:
- ai-mesh
restart: unless-stopped
networks:
ai-mesh:
external: true