Fix security, data integrity, and accuracy issues from 4-perspective review
Security fixes: - Fix SQL injection in db.py:update_generation_run (column name whitelist) - Flask SECRET_KEY from env var instead of hardcoded - Add LLM rating bounds validation (_clamp_rating, 1-10) - Fix JSON extraction trailing whitespace handling Data integrity: - Normalize 21 legacy category names to 11 canonical short forms - Add false_positive column, flag 73 non-AI drafts (361 relevant remain) - Document verified counts: 434 total/361 relevant drafts, 557 authors, 419 ideas, 11 gaps Code quality: - Fix version string 0.1.0 → 0.2.0 - Add close()/context manager to Embedder class - Dynamic matrix size instead of hardcoded "260x260" Blog accuracy: - Fix EU AI Act timeline (enforcement Aug 2026, not "18 months") - Distinguish OAuth consent from GDPR Einwilligung - Add EU AI Act Annex III context to hospital scenario - Add FIPA, eIDAS 2.0 references where relevant Methodology: - Add methodology.md documenting pipeline, limitations, rating rubric - Add LLM-as-judge caveats to analyzer.py - Document clustering threshold rationale Reviews from: legal (German/EU law), statistics, development, science perspectives. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -48,7 +48,8 @@ CREATE TABLE IF NOT EXISTS ratings (
|
||||
momentum_note TEXT DEFAULT '',
|
||||
relevance_note TEXT DEFAULT '',
|
||||
categories TEXT DEFAULT '[]', -- JSON array
|
||||
rated_at TEXT
|
||||
rated_at TEXT,
|
||||
false_positive INTEGER DEFAULT 0 -- 1 = flagged as not AI-agent related
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS embeddings (
|
||||
@@ -268,6 +269,11 @@ class Database:
|
||||
if col not in cols:
|
||||
self._conn.execute(f"ALTER TABLE drafts ADD COLUMN {col} {typedef}")
|
||||
|
||||
# ratings table migrations
|
||||
rating_cols = {r[1] for r in self._conn.execute("PRAGMA table_info(ratings)").fetchall()}
|
||||
if "false_positive" not in rating_cols:
|
||||
self._conn.execute("ALTER TABLE ratings ADD COLUMN false_positive INTEGER DEFAULT 0")
|
||||
|
||||
# ideas table migrations
|
||||
idea_cols = {r[1] for r in self._conn.execute("PRAGMA table_info(ideas)").fetchall()}
|
||||
if "novelty_score" not in idea_cols:
|
||||
@@ -1006,10 +1012,17 @@ class Database:
|
||||
self.conn.commit()
|
||||
return cur.lastrowid
|
||||
|
||||
_GENERATION_RUN_COLUMNS = frozenset({
|
||||
"family_name", "gap_ids", "total_input_tokens", "total_output_tokens",
|
||||
"model_used", "status", "started_at", "completed_at",
|
||||
})
|
||||
|
||||
def update_generation_run(self, run_id: int, **kwargs) -> None:
|
||||
sets = []
|
||||
params = []
|
||||
for k, v in kwargs.items():
|
||||
if k not in self._GENERATION_RUN_COLUMNS:
|
||||
raise ValueError(f"Invalid column for generation_runs: {k!r}")
|
||||
sets.append(f"{k} = ?")
|
||||
params.append(v)
|
||||
if not sets:
|
||||
|
||||
Reference in New Issue
Block a user