ietf-draft-analyzer/scripts/scheduled-update.sh

#!/bin/bash
# Scheduled observatory update -- run via cron
# Usage: crontab -e -> 0 6 * * * /path/to/scheduled-update.sh
#
# This script runs the full observatory update cycle:
#   1. Fetch new docs from all enabled sources (IETF, W3C)
#   2. Analyze unrated documents with Claude
#   3. Generate embeddings with Ollama
#   4. Extract ideas from new documents
#   5. Re-run gap analysis if enough new docs
#
# Logs are saved to data/logs/update-YYYYMMDD-HHMMSS.log
set -euo pipefail

cd "$(dirname "$0")/.."

LOG_DIR="data/logs"
mkdir -p "$LOG_DIR"
LOG_FILE="$LOG_DIR/update-$(date +%Y%m%d-%H%M%S).log"

echo "Starting scheduled update at $(date)" | tee "$LOG_FILE"

# Load environment (API keys, etc.)
if [ -f .env ]; then
    set -a
    source .env
    set +a
fi

# Run the observatory update (delta mode -- only fetch new docs)
python -c "
import sys
sys.path.insert(0, 'src')
from ietf_analyzer.observatory import Observatory
from ietf_analyzer.analyzer import Analyzer
from ietf_analyzer.config import Config
from ietf_analyzer.db import Database

config = Config.load()
db = Database(config)
analyzer = Analyzer(config, db)
obs = Observatory(config, db, analyzer)

try:
    result = obs.update()
    print(f'Results: {result}')
except Exception as e:
    print(f'ERROR: {e}', file=sys.stderr)
    sys.exit(1)
finally:
    db.close()
" >> "$LOG_FILE" 2>&1

EXIT_CODE=$?

echo "Completed at $(date) (exit code: $EXIT_CODE)" | tee -a "$LOG_FILE"

# Clean up old logs (keep last 30 days)
find "$LOG_DIR" -name "update-*.log" -mtime +30 -delete 2>/dev/null || true

exit $EXIT_CODE