Restructure: Move services from root to unified repo
Moved updated services from /home/sam/development/ root into aboutme_chat_demo/: - knowledge_service/ (with ChromaDB, gitea_scraper, FastAPI) - langgraph_service/ (with LangGraph agent orchestration) - airflow/ (with DAGs for scheduled ingestion) All services now in single repo location. Modular docker-compose files per service maintained. Removed duplicate nested directories. Updated files reflect latest working versions.
This commit is contained in:
@@ -5,10 +5,8 @@ Runs daily to fetch new/updated repos and ingest into ChromaDB.
|
|||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from airflow import DAG
|
from airflow import DAG
|
||||||
from airflow.operators.python import PythonOperator
|
from airflow.operators.python import PythonOperator
|
||||||
from airflow.providers.http.operators.http import SimpleHttpOperator
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import json
|
|
||||||
|
|
||||||
# Add knowledge_service to path for imports
|
# Add knowledge_service to path for imports
|
||||||
sys.path.insert(0, '/opt/airflow/dags/repo')
|
sys.path.insert(0, '/opt/airflow/dags/repo')
|
||||||
@@ -115,30 +113,26 @@ with DAG(
|
|||||||
'gitea_daily_ingestion',
|
'gitea_daily_ingestion',
|
||||||
default_args=default_args,
|
default_args=default_args,
|
||||||
description='Daily ingestion of Gitea repositories into knowledge base',
|
description='Daily ingestion of Gitea repositories into knowledge base',
|
||||||
schedule_interval=timedelta(days=1), # Run daily
|
schedule_interval=timedelta(days=1),
|
||||||
start_date=datetime(2024, 1, 1),
|
start_date=datetime(2024, 1, 1),
|
||||||
catchup=False,
|
catchup=False,
|
||||||
tags=['gitea', 'ingestion', 'knowledge'],
|
tags=['gitea', 'ingestion', 'knowledge'],
|
||||||
) as dag:
|
) as dag:
|
||||||
|
|
||||||
# Task 1: Fetch repository list
|
|
||||||
fetch_repos_task = PythonOperator(
|
fetch_repos_task = PythonOperator(
|
||||||
task_id='fetch_repos',
|
task_id='fetch_repos',
|
||||||
python_callable=fetch_gitea_repos,
|
python_callable=fetch_gitea_repos,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Task 2: Fetch README content
|
|
||||||
fetch_readmes_task = PythonOperator(
|
fetch_readmes_task = PythonOperator(
|
||||||
task_id='fetch_readmes',
|
task_id='fetch_readmes',
|
||||||
python_callable=fetch_readmes,
|
python_callable=fetch_readmes,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Task 3: Ingest into ChromaDB
|
|
||||||
ingest_task = PythonOperator(
|
ingest_task = PythonOperator(
|
||||||
task_id='ingest_to_chroma',
|
task_id='ingest_to_chroma',
|
||||||
python_callable=ingest_to_chroma,
|
python_callable=ingest_to_chroma,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Define task dependencies
|
|
||||||
fetch_repos_task >> fetch_readmes_task >> ingest_task
|
fetch_repos_task >> fetch_readmes_task >> ingest_task
|
||||||
|
|
||||||
|
|||||||
@@ -14,12 +14,12 @@ x-airflow-common:
|
|||||||
AIRFLOW__CORE__LOAD_EXAMPLES: 'false'
|
AIRFLOW__CORE__LOAD_EXAMPLES: 'false'
|
||||||
AIRFLOW__API__AUTH_BACKENDS: 'airflow.api.auth.backend.basic_auth,airflow.api.auth.backend.session'
|
AIRFLOW__API__AUTH_BACKENDS: 'airflow.api.auth.backend.basic_auth,airflow.api.auth.backend.session'
|
||||||
AIRFLOW__SCHEDULER__ENABLE_HEALTH_CHECK: 'true'
|
AIRFLOW__SCHEDULER__ENABLE_HEALTH_CHECK: 'true'
|
||||||
_PIP_ADDITIONAL_REQUIREMENTS: ${_PIP_ADDITIONAL_REQUIREMENTS:-}
|
|
||||||
volumes:
|
volumes:
|
||||||
- ${AIRFLOW_PROJ_DIR:-.}/dags:/opt/airflow/dags
|
- ${AIRFLOW_PROJ_DIR:-.}/dags:/opt/airflow/dags
|
||||||
- ${AIRFLOW_PROJ_DIR:-.}/logs:/opt/airflow/logs
|
- ${AIRFLOW_PROJ_DIR:-.}/logs:/opt/airflow/logs
|
||||||
- ${AIRFLOW_PROJ_DIR:-.}/config:/opt/airflow/config
|
- ${AIRFLOW_PROJ_DIR:-.}/config:/opt/airflow/config
|
||||||
- ${AIRFLOW_PROJ_DIR:-.}/plugins:/opt/airflow/plugins
|
- ${AIRFLOW_PROJ_DIR:-.}/plugins:/opt/airflow/plugins
|
||||||
|
- /home/sam/development/knowledge_service:/opt/airflow/dags/repo:ro
|
||||||
user: "${AIRFLOW_UID:-50000}:0"
|
user: "${AIRFLOW_UID:-50000}:0"
|
||||||
depends_on:
|
depends_on:
|
||||||
&airflow-common-depends-on
|
&airflow-common-depends-on
|
||||||
@@ -115,23 +115,6 @@ services:
|
|||||||
networks:
|
networks:
|
||||||
- ai-mesh
|
- ai-mesh
|
||||||
|
|
||||||
airflow-triggerer:
|
|
||||||
<<: *airflow-common
|
|
||||||
command: triggerer
|
|
||||||
healthcheck:
|
|
||||||
test: ["CMD-SHELL", 'airflow jobs check --job-type TriggererJob --hostname "$${HOSTNAME}"']
|
|
||||||
interval: 30s
|
|
||||||
timeout: 10s
|
|
||||||
retries: 5
|
|
||||||
start_period: 30s
|
|
||||||
restart: always
|
|
||||||
depends_on:
|
|
||||||
<<: *airflow-common-depends-on
|
|
||||||
airflow-init:
|
|
||||||
condition: service_completed_successfully
|
|
||||||
networks:
|
|
||||||
- ai-mesh
|
|
||||||
|
|
||||||
airflow-init:
|
airflow-init:
|
||||||
<<: *airflow-common
|
<<: *airflow-common
|
||||||
entrypoint: /bin/bash
|
entrypoint: /bin/bash
|
||||||
@@ -158,20 +141,6 @@ services:
|
|||||||
networks:
|
networks:
|
||||||
- ai-mesh
|
- ai-mesh
|
||||||
|
|
||||||
airflow-cli:
|
|
||||||
<<: *airflow-common
|
|
||||||
profiles:
|
|
||||||
- debug
|
|
||||||
environment:
|
|
||||||
<<: *airflow-common-env
|
|
||||||
CONNECTION_CHECK_MAX_COUNT: "0"
|
|
||||||
command:
|
|
||||||
- bash
|
|
||||||
- -c
|
|
||||||
- airflow
|
|
||||||
networks:
|
|
||||||
- ai-mesh
|
|
||||||
|
|
||||||
volumes:
|
volumes:
|
||||||
postgres-db-volume:
|
postgres-db-volume:
|
||||||
|
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import httpx
|
|||||||
import logging
|
import logging
|
||||||
import sys
|
import sys
|
||||||
import traceback
|
import traceback
|
||||||
|
import os
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", handlers=[logging.StreamHandler(sys.stdout)])
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", handlers=[logging.StreamHandler(sys.stdout)])
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -15,44 +16,53 @@ app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True,
|
|||||||
class MessageRequest(BaseModel):
|
class MessageRequest(BaseModel):
|
||||||
message: str
|
message: str
|
||||||
|
|
||||||
BRAIN_URL = "http://opencode-brain:5000"
|
LANGGRAPH_URL = os.getenv("LANGGRAPH_URL", "http://langgraph-service:8090")
|
||||||
KNOWLEDGE_URL = "http://knowledge-service:8080/query"
|
|
||||||
AUTH = httpx.BasicAuth("opencode", "sam4jo")
|
|
||||||
|
|
||||||
@app.post("/chat")
|
@app.post("/chat")
|
||||||
async def chat(request: MessageRequest):
|
async def chat(request: MessageRequest):
|
||||||
user_msg = request.message.lower()
|
"""
|
||||||
timeout_long = httpx.Timeout(180.0, connect=10.0)
|
Routes all queries through LangGraph Supervisor.
|
||||||
timeout_short = httpx.Timeout(5.0, connect=2.0)
|
No hardcoded keywords - LangGraph intelligently routes to:
|
||||||
|
- Librarian: For knowledge base queries (RAG)
|
||||||
|
- Opencode: For coding tasks
|
||||||
|
- Brain: For general LLM queries
|
||||||
|
"""
|
||||||
|
logger.info(f"Gateway: Routing query to LangGraph: {request.message}")
|
||||||
|
|
||||||
context = ""
|
try:
|
||||||
# Check for keywords to trigger Librarian (DB) lookup
|
async with httpx.AsyncClient(timeout=httpx.Timeout(60.0, connect=10.0)) as client:
|
||||||
if any(kw in user_msg for kw in ["sam", "hobby", "music", "guitar", "skiing", "experience"]):
|
response = await client.post(
|
||||||
logger.info("Gateway: Consulting Librarian (DB)...")
|
f"{LANGGRAPH_URL}/query",
|
||||||
async with httpx.AsyncClient(timeout=timeout_short) as client:
|
json={"query": request.message}
|
||||||
try:
|
)
|
||||||
k_res = await client.post(KNOWLEDGE_URL, json={"question": request.message})
|
|
||||||
if k_res.status_code == 200:
|
if response.status_code == 200:
|
||||||
context = k_res.json().get("context", "")
|
result = response.json()
|
||||||
except Exception as e:
|
agent_used = result.get("agent_used", "unknown")
|
||||||
logger.warning(f"Gateway: Librarian offline/slow: {str(e)}")
|
logger.info(f"Gateway: Response from {agent_used} agent")
|
||||||
|
return {"response": result["response"]}
|
||||||
|
else:
|
||||||
|
logger.error(f"Gateway: LangGraph error {response.status_code}")
|
||||||
|
return {"response": "Error: Orchestration service unavailable"}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Gateway: Error routing through LangGraph: {traceback.format_exc()}")
|
||||||
|
return {"response": "Error: Unable to process your request at this time."}
|
||||||
|
|
||||||
|
@app.get("/health")
|
||||||
|
async def health():
|
||||||
|
return {"status": "healthy", "service": "chat-gateway"}
|
||||||
|
|
||||||
|
@app.get("/agents")
|
||||||
|
async def list_agents():
|
||||||
|
"""List available agents from LangGraph."""
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(timeout=httpx.Timeout(10.0)) as client:
|
||||||
|
response = await client.get(f"{LANGGRAPH_URL}/agents")
|
||||||
|
if response.status_code == 200:
|
||||||
|
return response.json()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error fetching agents: {e}")
|
||||||
|
|
||||||
|
return {"agents": [], "error": "Could not retrieve agent list"}
|
||||||
|
|
||||||
# Forward to Brain (LLM)
|
|
||||||
async with httpx.AsyncClient(auth=AUTH, timeout=timeout_long) as brain_client:
|
|
||||||
try:
|
|
||||||
session_res = await brain_client.post(f"{BRAIN_URL}/session", json={"title": "Demo"})
|
|
||||||
session_id = session_res.json()["id"]
|
|
||||||
final_prompt = f"CONTEXT:\n{context}\n\nUSER: {request.message}" if context else request.message
|
|
||||||
response = await brain_client.post(f"{BRAIN_URL}/session/{session_id}/message", json={"parts": [{"type": "text", "text": final_prompt}]})
|
|
||||||
|
|
||||||
# FIX: Iterate through parts array to find text response
|
|
||||||
data = response.json()
|
|
||||||
if "parts" in data:
|
|
||||||
for part in data["parts"]:
|
|
||||||
if part.get("type") == "text" and "text" in part:
|
|
||||||
return {"response": part["text"]}
|
|
||||||
|
|
||||||
return {"response": "AI responded but no text found in expected format."}
|
|
||||||
except Exception:
|
|
||||||
logger.error(f"Gateway: Brain failure: {traceback.format_exc()}")
|
|
||||||
return {"response": "Error: The Brain is taking too long or is disconnected."}
|
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ services:
|
|||||||
image: postgres:15-alpine
|
image: postgres:15-alpine
|
||||||
environment:
|
environment:
|
||||||
POSTGRES_USER: sam
|
POSTGRES_USER: sam
|
||||||
POSTGRES_PASSWORD: sam4jo
|
POSTGRES_PASSWORD: sam4jo
|
||||||
POSTGRES_DB: chat_demo
|
POSTGRES_DB: chat_demo
|
||||||
ports:
|
ports:
|
||||||
- "5432:5432"
|
- "5432:5432"
|
||||||
@@ -11,18 +11,24 @@ services:
|
|||||||
- postgres_data:/var/lib/postgresql/data
|
- postgres_data:/var/lib/postgresql/data
|
||||||
networks:
|
networks:
|
||||||
- ai-mesh
|
- ai-mesh
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
backend:
|
backend:
|
||||||
build: ./backend
|
build: ./backend
|
||||||
ports:
|
ports:
|
||||||
- "8000:8000"
|
- "8000:8000"
|
||||||
environment:
|
environment:
|
||||||
DATABASE_URL: postgresql://sam:sam4jo@db:5432/chat_demo
|
DATABASE_URL: postgresql://sam:sam4jo@db:5432/chat_demo
|
||||||
|
LANGGRAPH_URL: http://langgraph-service:8090
|
||||||
volumes:
|
volumes:
|
||||||
- ./backend:/app
|
- ./backend:/app
|
||||||
depends_on:
|
depends_on:
|
||||||
- db
|
- db
|
||||||
|
- langgraph-service
|
||||||
networks:
|
networks:
|
||||||
- ai-mesh
|
- ai-mesh
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
frontend:
|
frontend:
|
||||||
build: ./frontend
|
build: ./frontend
|
||||||
ports:
|
ports:
|
||||||
@@ -32,10 +38,15 @@ services:
|
|||||||
- /app/node_modules
|
- /app/node_modules
|
||||||
environment:
|
environment:
|
||||||
- CHOKIDAR_USEPOLLING=true
|
- CHOKIDAR_USEPOLLING=true
|
||||||
|
depends_on:
|
||||||
|
- backend
|
||||||
networks:
|
networks:
|
||||||
- ai-mesh
|
- ai-mesh
|
||||||
|
|
||||||
volumes:
|
volumes:
|
||||||
postgres_data:
|
postgres_data:
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
ai-mesh:
|
ai-mesh:
|
||||||
external: true
|
external: true
|
||||||
|
|
||||||
|
|||||||
@@ -61,7 +61,6 @@ class GiteaScraper:
|
|||||||
|
|
||||||
def get_readme(self, repo_name: str) -> str:
|
def get_readme(self, repo_name: str) -> str:
|
||||||
"""Fetch README content for a repository."""
|
"""Fetch README content for a repository."""
|
||||||
# Try common README filenames
|
|
||||||
readme_names = ["README.md", "readme.md", "Readme.md", "README.rst"]
|
readme_names = ["README.md", "readme.md", "Readme.md", "README.rst"]
|
||||||
|
|
||||||
for readme_name in readme_names:
|
for readme_name in readme_names:
|
||||||
@@ -113,7 +112,7 @@ if __name__ == "__main__":
|
|||||||
repos = scraper.get_user_repos()
|
repos = scraper.get_user_repos()
|
||||||
print(f"Found {len(repos)} repositories")
|
print(f"Found {len(repos)} repositories")
|
||||||
|
|
||||||
for repo in repos[:3]: # Test with first 3
|
for repo in repos[:3]:
|
||||||
print(f"\nRepo: {repo.name}")
|
print(f"\nRepo: {repo.name}")
|
||||||
readme = scraper.get_readme(repo.name)
|
readme = scraper.get_readme(repo.name)
|
||||||
if readme:
|
if readme:
|
||||||
|
|||||||
@@ -6,16 +6,22 @@ RUN apt-get update && apt-get install -y \
|
|||||||
g++ \
|
g++ \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Create app directory
|
# Create directories
|
||||||
|
RUN mkdir -p /app/packages /app/code
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# Copy requirements
|
# Install packages to isolated directory
|
||||||
COPY requirements.txt .
|
COPY requirements.txt .
|
||||||
RUN pip install --no-cache-dir -r requirements.txt
|
RUN pip install --target=/app/packages -r requirements.txt
|
||||||
|
|
||||||
# Copy code
|
# Copy code
|
||||||
COPY . .
|
COPY . /app/code/
|
||||||
|
|
||||||
|
ENV PYTHONPATH=/app/packages
|
||||||
|
ENV PYTHONUNBUFFERED=1
|
||||||
|
|
||||||
|
WORKDIR /app/code
|
||||||
EXPOSE 8090
|
EXPOSE 8090
|
||||||
|
|
||||||
CMD ["python3", "-m", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8090"]
|
CMD ["python3", "-m", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8090"]
|
||||||
|
|||||||
26
langgraph_service/docker-compose.yml
Normal file
26
langgraph_service/docker-compose.yml
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
version: '3.8'
|
||||||
|
|
||||||
|
services:
|
||||||
|
langgraph-service:
|
||||||
|
build: .
|
||||||
|
image: langgraph-service:latest
|
||||||
|
container_name: langgraph-service
|
||||||
|
ports:
|
||||||
|
- "8090:8090"
|
||||||
|
volumes:
|
||||||
|
# Only mount code files, not packages
|
||||||
|
- ./main.py:/app/code/main.py:ro
|
||||||
|
- ./supervisor_agent.py:/app/code/supervisor_agent.py:ro
|
||||||
|
environment:
|
||||||
|
- PYTHONUNBUFFERED=1
|
||||||
|
- PYTHONPATH=/app/packages
|
||||||
|
- OPENCODE_PASSWORD=${OPENCODE_PASSWORD:-sam4jo}
|
||||||
|
- KNOWLEDGE_SERVICE_URL=http://knowledge-service:8080
|
||||||
|
networks:
|
||||||
|
- ai-mesh
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
networks:
|
||||||
|
ai-mesh:
|
||||||
|
external: true
|
||||||
|
|
||||||
Reference in New Issue
Block a user