#!/bin/bash # Scheduled observatory update -- run via cron # Usage: crontab -e -> 0 6 * * * /path/to/scheduled-update.sh # # This script runs the full observatory update cycle: # 1. Fetch new docs from all enabled sources (IETF, W3C) # 2. Analyze unrated documents with Claude # 3. Generate embeddings with Ollama # 4. Extract ideas from new documents # 5. Re-run gap analysis if enough new docs # # Logs are saved to data/logs/update-YYYYMMDD-HHMMSS.log set -euo pipefail cd "$(dirname "$0")/.." LOG_DIR="data/logs" mkdir -p "$LOG_DIR" LOG_FILE="$LOG_DIR/update-$(date +%Y%m%d-%H%M%S).log" echo "Starting scheduled update at $(date)" | tee "$LOG_FILE" # Load environment (API keys, etc.) if [ -f .env ]; then set -a source .env set +a fi # Run the observatory update (delta mode -- only fetch new docs) python -c " import sys sys.path.insert(0, 'src') from ietf_analyzer.observatory import Observatory from ietf_analyzer.analyzer import Analyzer from ietf_analyzer.config import Config from ietf_analyzer.db import Database config = Config.load() db = Database(config) analyzer = Analyzer(config, db) obs = Observatory(config, db, analyzer) try: result = obs.update() print(f'Results: {result}') except Exception as e: print(f'ERROR: {e}', file=sys.stderr) sys.exit(1) finally: db.close() " >> "$LOG_FILE" 2>&1 EXIT_CODE=$? echo "Completed at $(date) (exit code: $EXIT_CODE)" | tee -a "$LOG_FILE" # Clean up old logs (keep last 30 days) find "$LOG_DIR" -name "update-*.log" -mtime +30 -delete 2>/dev/null || true exit $EXIT_CODE