Restructure: Move services from root to unified repo

Moved updated services from /home/sam/development/ root into aboutme_chat_demo/:
- knowledge_service/ (with ChromaDB, gitea_scraper, FastAPI)
- langgraph_service/ (with LangGraph agent orchestration)
- airflow/ (with DAGs for scheduled ingestion)

All services now in single repo location.
Modular docker-compose files per service maintained.
Removed duplicate nested directories.
Updated files reflect latest working versions.
This commit is contained in:
2026-02-28 14:51:37 +11:00
parent 628ba96998
commit 76f7367e2f
7 changed files with 99 additions and 84 deletions

View File

@@ -5,10 +5,8 @@ Runs daily to fetch new/updated repos and ingest into ChromaDB.
from datetime import datetime, timedelta
from airflow import DAG
from airflow.operators.python import PythonOperator
from airflow.providers.http.operators.http import SimpleHttpOperator
import os
import sys
import json
# Add knowledge_service to path for imports
sys.path.insert(0, '/opt/airflow/dags/repo')
@@ -115,30 +113,26 @@ with DAG(
'gitea_daily_ingestion',
default_args=default_args,
description='Daily ingestion of Gitea repositories into knowledge base',
schedule_interval=timedelta(days=1), # Run daily
schedule_interval=timedelta(days=1),
start_date=datetime(2024, 1, 1),
catchup=False,
tags=['gitea', 'ingestion', 'knowledge'],
) as dag:
# Task 1: Fetch repository list
fetch_repos_task = PythonOperator(
task_id='fetch_repos',
python_callable=fetch_gitea_repos,
)
# Task 2: Fetch README content
fetch_readmes_task = PythonOperator(
task_id='fetch_readmes',
python_callable=fetch_readmes,
)
# Task 3: Ingest into ChromaDB
ingest_task = PythonOperator(
task_id='ingest_to_chroma',
python_callable=ingest_to_chroma,
)
# Define task dependencies
fetch_repos_task >> fetch_readmes_task >> ingest_task

View File

@@ -14,12 +14,12 @@ x-airflow-common:
AIRFLOW__CORE__LOAD_EXAMPLES: 'false'
AIRFLOW__API__AUTH_BACKENDS: 'airflow.api.auth.backend.basic_auth,airflow.api.auth.backend.session'
AIRFLOW__SCHEDULER__ENABLE_HEALTH_CHECK: 'true'
_PIP_ADDITIONAL_REQUIREMENTS: ${_PIP_ADDITIONAL_REQUIREMENTS:-}
volumes:
- ${AIRFLOW_PROJ_DIR:-.}/dags:/opt/airflow/dags
- ${AIRFLOW_PROJ_DIR:-.}/logs:/opt/airflow/logs
- ${AIRFLOW_PROJ_DIR:-.}/config:/opt/airflow/config
- ${AIRFLOW_PROJ_DIR:-.}/plugins:/opt/airflow/plugins
- /home/sam/development/knowledge_service:/opt/airflow/dags/repo:ro
user: "${AIRFLOW_UID:-50000}:0"
depends_on:
&airflow-common-depends-on
@@ -115,23 +115,6 @@ services:
networks:
- ai-mesh
airflow-triggerer:
<<: *airflow-common
command: triggerer
healthcheck:
test: ["CMD-SHELL", 'airflow jobs check --job-type TriggererJob --hostname "$${HOSTNAME}"']
interval: 30s
timeout: 10s
retries: 5
start_period: 30s
restart: always
depends_on:
<<: *airflow-common-depends-on
airflow-init:
condition: service_completed_successfully
networks:
- ai-mesh
airflow-init:
<<: *airflow-common
entrypoint: /bin/bash
@@ -158,20 +141,6 @@ services:
networks:
- ai-mesh
airflow-cli:
<<: *airflow-common
profiles:
- debug
environment:
<<: *airflow-common-env
CONNECTION_CHECK_MAX_COUNT: "0"
command:
- bash
- -c
- airflow
networks:
- ai-mesh
volumes:
postgres-db-volume: