Restructure: Move services from root to unified repo
Moved updated services from /home/sam/development/ root into aboutme_chat_demo/: - knowledge_service/ (with ChromaDB, gitea_scraper, FastAPI) - langgraph_service/ (with LangGraph agent orchestration) - airflow/ (with DAGs for scheduled ingestion) All services now in single repo location. Modular docker-compose files per service maintained. Removed duplicate nested directories. Updated files reflect latest working versions.
This commit is contained in:
@@ -5,10 +5,8 @@ Runs daily to fetch new/updated repos and ingest into ChromaDB.
|
||||
from datetime import datetime, timedelta
|
||||
from airflow import DAG
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.providers.http.operators.http import SimpleHttpOperator
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
|
||||
# Add knowledge_service to path for imports
|
||||
sys.path.insert(0, '/opt/airflow/dags/repo')
|
||||
@@ -115,30 +113,26 @@ with DAG(
|
||||
'gitea_daily_ingestion',
|
||||
default_args=default_args,
|
||||
description='Daily ingestion of Gitea repositories into knowledge base',
|
||||
schedule_interval=timedelta(days=1), # Run daily
|
||||
schedule_interval=timedelta(days=1),
|
||||
start_date=datetime(2024, 1, 1),
|
||||
catchup=False,
|
||||
tags=['gitea', 'ingestion', 'knowledge'],
|
||||
) as dag:
|
||||
|
||||
# Task 1: Fetch repository list
|
||||
fetch_repos_task = PythonOperator(
|
||||
task_id='fetch_repos',
|
||||
python_callable=fetch_gitea_repos,
|
||||
)
|
||||
|
||||
# Task 2: Fetch README content
|
||||
fetch_readmes_task = PythonOperator(
|
||||
task_id='fetch_readmes',
|
||||
python_callable=fetch_readmes,
|
||||
)
|
||||
|
||||
# Task 3: Ingest into ChromaDB
|
||||
ingest_task = PythonOperator(
|
||||
task_id='ingest_to_chroma',
|
||||
python_callable=ingest_to_chroma,
|
||||
)
|
||||
|
||||
# Define task dependencies
|
||||
fetch_repos_task >> fetch_readmes_task >> ingest_task
|
||||
|
||||
|
||||
@@ -14,12 +14,12 @@ x-airflow-common:
|
||||
AIRFLOW__CORE__LOAD_EXAMPLES: 'false'
|
||||
AIRFLOW__API__AUTH_BACKENDS: 'airflow.api.auth.backend.basic_auth,airflow.api.auth.backend.session'
|
||||
AIRFLOW__SCHEDULER__ENABLE_HEALTH_CHECK: 'true'
|
||||
_PIP_ADDITIONAL_REQUIREMENTS: ${_PIP_ADDITIONAL_REQUIREMENTS:-}
|
||||
volumes:
|
||||
- ${AIRFLOW_PROJ_DIR:-.}/dags:/opt/airflow/dags
|
||||
- ${AIRFLOW_PROJ_DIR:-.}/logs:/opt/airflow/logs
|
||||
- ${AIRFLOW_PROJ_DIR:-.}/config:/opt/airflow/config
|
||||
- ${AIRFLOW_PROJ_DIR:-.}/plugins:/opt/airflow/plugins
|
||||
- /home/sam/development/knowledge_service:/opt/airflow/dags/repo:ro
|
||||
user: "${AIRFLOW_UID:-50000}:0"
|
||||
depends_on:
|
||||
&airflow-common-depends-on
|
||||
@@ -115,23 +115,6 @@ services:
|
||||
networks:
|
||||
- ai-mesh
|
||||
|
||||
airflow-triggerer:
|
||||
<<: *airflow-common
|
||||
command: triggerer
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", 'airflow jobs check --job-type TriggererJob --hostname "$${HOSTNAME}"']
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 5
|
||||
start_period: 30s
|
||||
restart: always
|
||||
depends_on:
|
||||
<<: *airflow-common-depends-on
|
||||
airflow-init:
|
||||
condition: service_completed_successfully
|
||||
networks:
|
||||
- ai-mesh
|
||||
|
||||
airflow-init:
|
||||
<<: *airflow-common
|
||||
entrypoint: /bin/bash
|
||||
@@ -158,20 +141,6 @@ services:
|
||||
networks:
|
||||
- ai-mesh
|
||||
|
||||
airflow-cli:
|
||||
<<: *airflow-common
|
||||
profiles:
|
||||
- debug
|
||||
environment:
|
||||
<<: *airflow-common-env
|
||||
CONNECTION_CHECK_MAX_COUNT: "0"
|
||||
command:
|
||||
- bash
|
||||
- -c
|
||||
- airflow
|
||||
networks:
|
||||
- ai-mesh
|
||||
|
||||
volumes:
|
||||
postgres-db-volume:
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ import httpx
|
||||
import logging
|
||||
import sys
|
||||
import traceback
|
||||
import os
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", handlers=[logging.StreamHandler(sys.stdout)])
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -15,44 +16,53 @@ app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True,
|
||||
class MessageRequest(BaseModel):
|
||||
message: str
|
||||
|
||||
BRAIN_URL = "http://opencode-brain:5000"
|
||||
KNOWLEDGE_URL = "http://knowledge-service:8080/query"
|
||||
AUTH = httpx.BasicAuth("opencode", "sam4jo")
|
||||
LANGGRAPH_URL = os.getenv("LANGGRAPH_URL", "http://langgraph-service:8090")
|
||||
|
||||
@app.post("/chat")
|
||||
async def chat(request: MessageRequest):
|
||||
user_msg = request.message.lower()
|
||||
timeout_long = httpx.Timeout(180.0, connect=10.0)
|
||||
timeout_short = httpx.Timeout(5.0, connect=2.0)
|
||||
"""
|
||||
Routes all queries through LangGraph Supervisor.
|
||||
No hardcoded keywords - LangGraph intelligently routes to:
|
||||
- Librarian: For knowledge base queries (RAG)
|
||||
- Opencode: For coding tasks
|
||||
- Brain: For general LLM queries
|
||||
"""
|
||||
logger.info(f"Gateway: Routing query to LangGraph: {request.message}")
|
||||
|
||||
context = ""
|
||||
# Check for keywords to trigger Librarian (DB) lookup
|
||||
if any(kw in user_msg for kw in ["sam", "hobby", "music", "guitar", "skiing", "experience"]):
|
||||
logger.info("Gateway: Consulting Librarian (DB)...")
|
||||
async with httpx.AsyncClient(timeout=timeout_short) as client:
|
||||
try:
|
||||
k_res = await client.post(KNOWLEDGE_URL, json={"question": request.message})
|
||||
if k_res.status_code == 200:
|
||||
context = k_res.json().get("context", "")
|
||||
except Exception as e:
|
||||
logger.warning(f"Gateway: Librarian offline/slow: {str(e)}")
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=httpx.Timeout(60.0, connect=10.0)) as client:
|
||||
response = await client.post(
|
||||
f"{LANGGRAPH_URL}/query",
|
||||
json={"query": request.message}
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
agent_used = result.get("agent_used", "unknown")
|
||||
logger.info(f"Gateway: Response from {agent_used} agent")
|
||||
return {"response": result["response"]}
|
||||
else:
|
||||
logger.error(f"Gateway: LangGraph error {response.status_code}")
|
||||
return {"response": "Error: Orchestration service unavailable"}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Gateway: Error routing through LangGraph: {traceback.format_exc()}")
|
||||
return {"response": "Error: Unable to process your request at this time."}
|
||||
|
||||
@app.get("/health")
|
||||
async def health():
|
||||
return {"status": "healthy", "service": "chat-gateway"}
|
||||
|
||||
@app.get("/agents")
|
||||
async def list_agents():
|
||||
"""List available agents from LangGraph."""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=httpx.Timeout(10.0)) as client:
|
||||
response = await client.get(f"{LANGGRAPH_URL}/agents")
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching agents: {e}")
|
||||
|
||||
return {"agents": [], "error": "Could not retrieve agent list"}
|
||||
|
||||
# Forward to Brain (LLM)
|
||||
async with httpx.AsyncClient(auth=AUTH, timeout=timeout_long) as brain_client:
|
||||
try:
|
||||
session_res = await brain_client.post(f"{BRAIN_URL}/session", json={"title": "Demo"})
|
||||
session_id = session_res.json()["id"]
|
||||
final_prompt = f"CONTEXT:\n{context}\n\nUSER: {request.message}" if context else request.message
|
||||
response = await brain_client.post(f"{BRAIN_URL}/session/{session_id}/message", json={"parts": [{"type": "text", "text": final_prompt}]})
|
||||
|
||||
# FIX: Iterate through parts array to find text response
|
||||
data = response.json()
|
||||
if "parts" in data:
|
||||
for part in data["parts"]:
|
||||
if part.get("type") == "text" and "text" in part:
|
||||
return {"response": part["text"]}
|
||||
|
||||
return {"response": "AI responded but no text found in expected format."}
|
||||
except Exception:
|
||||
logger.error(f"Gateway: Brain failure: {traceback.format_exc()}")
|
||||
return {"response": "Error: The Brain is taking too long or is disconnected."}
|
||||
|
||||
@@ -3,7 +3,7 @@ services:
|
||||
image: postgres:15-alpine
|
||||
environment:
|
||||
POSTGRES_USER: sam
|
||||
POSTGRES_PASSWORD: sam4jo
|
||||
POSTGRES_PASSWORD: sam4jo
|
||||
POSTGRES_DB: chat_demo
|
||||
ports:
|
||||
- "5432:5432"
|
||||
@@ -11,18 +11,24 @@ services:
|
||||
- postgres_data:/var/lib/postgresql/data
|
||||
networks:
|
||||
- ai-mesh
|
||||
restart: unless-stopped
|
||||
|
||||
backend:
|
||||
build: ./backend
|
||||
ports:
|
||||
- "8000:8000"
|
||||
environment:
|
||||
DATABASE_URL: postgresql://sam:sam4jo@db:5432/chat_demo
|
||||
LANGGRAPH_URL: http://langgraph-service:8090
|
||||
volumes:
|
||||
- ./backend:/app
|
||||
depends_on:
|
||||
- db
|
||||
- langgraph-service
|
||||
networks:
|
||||
- ai-mesh
|
||||
restart: unless-stopped
|
||||
|
||||
frontend:
|
||||
build: ./frontend
|
||||
ports:
|
||||
@@ -32,10 +38,15 @@ services:
|
||||
- /app/node_modules
|
||||
environment:
|
||||
- CHOKIDAR_USEPOLLING=true
|
||||
depends_on:
|
||||
- backend
|
||||
networks:
|
||||
- ai-mesh
|
||||
|
||||
volumes:
|
||||
postgres_data:
|
||||
|
||||
networks:
|
||||
ai-mesh:
|
||||
external: true
|
||||
ai-mesh:
|
||||
external: true
|
||||
|
||||
|
||||
@@ -61,7 +61,6 @@ class GiteaScraper:
|
||||
|
||||
def get_readme(self, repo_name: str) -> str:
|
||||
"""Fetch README content for a repository."""
|
||||
# Try common README filenames
|
||||
readme_names = ["README.md", "readme.md", "Readme.md", "README.rst"]
|
||||
|
||||
for readme_name in readme_names:
|
||||
@@ -113,7 +112,7 @@ if __name__ == "__main__":
|
||||
repos = scraper.get_user_repos()
|
||||
print(f"Found {len(repos)} repositories")
|
||||
|
||||
for repo in repos[:3]: # Test with first 3
|
||||
for repo in repos[:3]:
|
||||
print(f"\nRepo: {repo.name}")
|
||||
readme = scraper.get_readme(repo.name)
|
||||
if readme:
|
||||
|
||||
@@ -6,16 +6,22 @@ RUN apt-get update && apt-get install -y \
|
||||
g++ \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Create app directory
|
||||
# Create directories
|
||||
RUN mkdir -p /app/packages /app/code
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy requirements
|
||||
# Install packages to isolated directory
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
RUN pip install --target=/app/packages -r requirements.txt
|
||||
|
||||
# Copy code
|
||||
COPY . .
|
||||
COPY . /app/code/
|
||||
|
||||
ENV PYTHONPATH=/app/packages
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
WORKDIR /app/code
|
||||
EXPOSE 8090
|
||||
|
||||
CMD ["python3", "-m", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8090"]
|
||||
|
||||
26
langgraph_service/docker-compose.yml
Normal file
26
langgraph_service/docker-compose.yml
Normal file
@@ -0,0 +1,26 @@
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
langgraph-service:
|
||||
build: .
|
||||
image: langgraph-service:latest
|
||||
container_name: langgraph-service
|
||||
ports:
|
||||
- "8090:8090"
|
||||
volumes:
|
||||
# Only mount code files, not packages
|
||||
- ./main.py:/app/code/main.py:ro
|
||||
- ./supervisor_agent.py:/app/code/supervisor_agent.py:ro
|
||||
environment:
|
||||
- PYTHONUNBUFFERED=1
|
||||
- PYTHONPATH=/app/packages
|
||||
- OPENCODE_PASSWORD=${OPENCODE_PASSWORD:-sam4jo}
|
||||
- KNOWLEDGE_SERVICE_URL=http://knowledge-service:8080
|
||||
networks:
|
||||
- ai-mesh
|
||||
restart: unless-stopped
|
||||
|
||||
networks:
|
||||
ai-mesh:
|
||||
external: true
|
||||
|
||||
Reference in New Issue
Block a user