Restructure: Move services from root to unified repo
Moved updated services from /home/sam/development/ root into aboutme_chat_demo/: - knowledge_service/ (with ChromaDB, gitea_scraper, FastAPI) - langgraph_service/ (with LangGraph agent orchestration) - airflow/ (with DAGs for scheduled ingestion) All services now in single repo location. Modular docker-compose files per service maintained. Removed duplicate nested directories. Updated files reflect latest working versions.
This commit is contained in:
@@ -5,10 +5,8 @@ Runs daily to fetch new/updated repos and ingest into ChromaDB.
|
||||
from datetime import datetime, timedelta
|
||||
from airflow import DAG
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.providers.http.operators.http import SimpleHttpOperator
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
|
||||
# Add knowledge_service to path for imports
|
||||
sys.path.insert(0, '/opt/airflow/dags/repo')
|
||||
@@ -115,30 +113,26 @@ with DAG(
|
||||
'gitea_daily_ingestion',
|
||||
default_args=default_args,
|
||||
description='Daily ingestion of Gitea repositories into knowledge base',
|
||||
schedule_interval=timedelta(days=1), # Run daily
|
||||
schedule_interval=timedelta(days=1),
|
||||
start_date=datetime(2024, 1, 1),
|
||||
catchup=False,
|
||||
tags=['gitea', 'ingestion', 'knowledge'],
|
||||
) as dag:
|
||||
|
||||
# Task 1: Fetch repository list
|
||||
fetch_repos_task = PythonOperator(
|
||||
task_id='fetch_repos',
|
||||
python_callable=fetch_gitea_repos,
|
||||
)
|
||||
|
||||
# Task 2: Fetch README content
|
||||
fetch_readmes_task = PythonOperator(
|
||||
task_id='fetch_readmes',
|
||||
python_callable=fetch_readmes,
|
||||
)
|
||||
|
||||
# Task 3: Ingest into ChromaDB
|
||||
ingest_task = PythonOperator(
|
||||
task_id='ingest_to_chroma',
|
||||
python_callable=ingest_to_chroma,
|
||||
)
|
||||
|
||||
# Define task dependencies
|
||||
fetch_repos_task >> fetch_readmes_task >> ingest_task
|
||||
|
||||
|
||||
@@ -14,12 +14,12 @@ x-airflow-common:
|
||||
AIRFLOW__CORE__LOAD_EXAMPLES: 'false'
|
||||
AIRFLOW__API__AUTH_BACKENDS: 'airflow.api.auth.backend.basic_auth,airflow.api.auth.backend.session'
|
||||
AIRFLOW__SCHEDULER__ENABLE_HEALTH_CHECK: 'true'
|
||||
_PIP_ADDITIONAL_REQUIREMENTS: ${_PIP_ADDITIONAL_REQUIREMENTS:-}
|
||||
volumes:
|
||||
- ${AIRFLOW_PROJ_DIR:-.}/dags:/opt/airflow/dags
|
||||
- ${AIRFLOW_PROJ_DIR:-.}/logs:/opt/airflow/logs
|
||||
- ${AIRFLOW_PROJ_DIR:-.}/config:/opt/airflow/config
|
||||
- ${AIRFLOW_PROJ_DIR:-.}/plugins:/opt/airflow/plugins
|
||||
- /home/sam/development/knowledge_service:/opt/airflow/dags/repo:ro
|
||||
user: "${AIRFLOW_UID:-50000}:0"
|
||||
depends_on:
|
||||
&airflow-common-depends-on
|
||||
@@ -115,23 +115,6 @@ services:
|
||||
networks:
|
||||
- ai-mesh
|
||||
|
||||
airflow-triggerer:
|
||||
<<: *airflow-common
|
||||
command: triggerer
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", 'airflow jobs check --job-type TriggererJob --hostname "$${HOSTNAME}"']
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 5
|
||||
start_period: 30s
|
||||
restart: always
|
||||
depends_on:
|
||||
<<: *airflow-common-depends-on
|
||||
airflow-init:
|
||||
condition: service_completed_successfully
|
||||
networks:
|
||||
- ai-mesh
|
||||
|
||||
airflow-init:
|
||||
<<: *airflow-common
|
||||
entrypoint: /bin/bash
|
||||
@@ -158,20 +141,6 @@ services:
|
||||
networks:
|
||||
- ai-mesh
|
||||
|
||||
airflow-cli:
|
||||
<<: *airflow-common
|
||||
profiles:
|
||||
- debug
|
||||
environment:
|
||||
<<: *airflow-common-env
|
||||
CONNECTION_CHECK_MAX_COUNT: "0"
|
||||
command:
|
||||
- bash
|
||||
- -c
|
||||
- airflow
|
||||
networks:
|
||||
- ai-mesh
|
||||
|
||||
volumes:
|
||||
postgres-db-volume:
|
||||
|
||||
|
||||
Reference in New Issue
Block a user