From dec8667193e3e493307622be53aca59e6d7eb205 Mon Sep 17 00:00:00 2001 From: Christian Nennemann Date: Sun, 8 Mar 2026 20:35:32 +0100 Subject: [PATCH] Add 6 new analysis pages and 5 CLI reports New web UI pages with Plotly charts: - /sources: cross-source comparison (ratings, categories by standards body) - /false-positives: profiling of 73 false positives (box plots, terms) - /trends: temporal evolution (submissions, ratings, safety ratio over time) - /complexity: draft complexity matrix (correlations, scatter plots) - /idea-analysis: idea novelty deep dive (sunburst, distribution, shared ideas) - /citations: enhanced with influence analysis and BCP dependency tabs New CLI reports (ietf report ): - sources, false-positives, citations, complexity, idea-analysis Co-Authored-By: Claude Opus 4.6 --- data/reports/citations.md | 211 +++++++ data/reports/complexity.md | 2 +- data/reports/false-positives.md | 2 +- data/reports/idea-analysis.md | 146 +++++ data/reports/sources.md | 2 +- src/ietf_analyzer/reports.py | 164 +++-- src/webui/app.py | 36 +- src/webui/data.py | 1017 +++++++++++++++++++++++++++++++ 8 files changed, 1517 insertions(+), 63 deletions(-) create mode 100644 data/reports/citations.md create mode 100644 data/reports/idea-analysis.md diff --git a/data/reports/citations.md b/data/reports/citations.md new file mode 100644 index 0000000..b4727c6 --- /dev/null +++ b/data/reports/citations.md @@ -0,0 +1,211 @@ +# Citation Influence & BCP Dependency Analysis +*Generated 2026-03-08 19:32 UTC — 360 of 761 drafts analyzed, 4231 total references (2443 RFC, 698 draft, 1090 BCP)* + +## Top 20 Most-Cited RFCs + +| # | RFC | Name | Cited By | +|--:|-----|------|--------:| +| 1 | RFC 2119 | Key words (MUST/SHALL/MAY) | 285 drafts | +| 2 | RFC 8174 | Key words update | 237 drafts | +| 3 | RFC 8446 | TLS 1.3 | 42 drafts | +| 4 | RFC 6749 | OAuth 2.0 | 36 drafts | +| 5 | RFC 9110 | HTTP Semantics | 34 drafts | +| 6 | RFC 8126 | | 26 drafts | +| 7 | RFC 8259 | JSON | 26 drafts | +| 8 | RFC 5280 | | 22 drafts | +| 9 | RFC 7519 | JWT | 22 drafts | +| 10 | RFC 9052 | COSE | 20 drafts | +| 11 | RFC 8949 | | 19 drafts | +| 12 | RFC 9528 | | 19 drafts | +| 13 | RFC 8392 | CWT | 18 drafts | +| 14 | RFC 7515 | JWS | 16 drafts | +| 15 | RFC 9000 | | 16 drafts | +| 16 | RFC 4648 | | 14 drafts | +| 17 | RFC 5234 | | 14 drafts | +| 18 | RFC 9334 | | 14 drafts | +| 19 | RFC 3986 | URIs | 13 drafts | +| 20 | RFC 6241 | | 13 drafts | + +## Top 20 Most-Citing Drafts + +Drafts with the highest outgoing reference count. + +| # | Draft | Category | RFCs | Drafts | BCPs | Total | +|--:|-------|----------|-----:|-------:|-----:|------:| +| 1 | draft-templin-6man-aero3 | A2A protocols | 68 | 9 | 5 | 82 | +| 2 | draft-templin-intarea-aero2 | A2A protocols | 68 | 10 | 5 | 83 | +| 3 | draft-templin-intarea-aero | Autonomous netops | 59 | 10 | 4 | 73 | +| 4 | draft-ietf-anima-constrained-voucher | Agent identity/auth | 44 | 14 | 4 | 62 | +| 5 | draft-eggert-mailmaint-uaautoconf | Other | 41 | 2 | 5 | 48 | +| 6 | draft-ietf-anima-brski-prm | Other | 40 | 13 | 5 | 58 | +| 7 | draft-ietf-mailmaint-pacc | Other | 39 | 2 | 5 | 46 | +| 8 | draft-mozleywilliams-dnsop-bandaid | Agent discovery/reg | 37 | 4 | 4 | 45 | +| 9 | draft-ietf-ace-edhoc-oscore-profile | Agent identity/auth | 33 | 14 | 5 | 52 | +| 10 | draft-ietf-lamps-e2e-mail-guidance | Other | 26 | 11 | 4 | 41 | +| 11 | draft-ietf-tls-deprecate-obsolete-kex | Policy/governance | 25 | 1 | 4 | 30 | +| 12 | draft-ietf-ace-coap-est-oscore | Agent identity/auth | 24 | 4 | 3 | 31 | +| 13 | draft-ietf-lake-app-profiles | Data formats/interop | 24 | 6 | 6 | 36 | +| 14 | draft-ietf-emu-eap-edhoc | Other | 20 | 5 | 3 | 28 | +| 15 | draft-sipos-dtn-bp-safe | Other | 20 | 5 | 6 | 31 | +| 16 | draft-ietf-lake-authz | Agent identity/auth | 19 | 4 | 4 | 27 | +| 17 | draft-howe-sipcore-mcp-extension | A2A protocols | 18 | 0 | 4 | 22 | +| 18 | draft-ietf-httpbis-rfc6265bis | Other | 18 | 0 | 4 | 22 | +| 19 | draft-ietf-lamps-rfc5274bis | Other | 18 | 3 | 3 | 24 | +| 20 | draft-ietf-lamps-rfc7030-csrattrs | Data formats/interop | 18 | 0 | 3 | 21 | + +## Influence Score (PageRank-style) + +Drafts ranked by weighted sum of how often their cited RFCs are themselves cited. + +| # | Draft | Category | Out-Degree | Influence Score | +|--:|-------|----------|----------:|---------:| +| 1 | draft-ietf-ace-edhoc-oscore-profile | Agent identity/auth | 52 | 930 | +| 2 | draft-ietf-anima-brski-prm | Other | 58 | 853 | +| 3 | draft-eggert-mailmaint-uaautoconf | Other | 48 | 844 | +| 4 | draft-ietf-mailmaint-pacc | Other | 46 | 842 | +| 5 | draft-ietf-anima-constrained-voucher | Agent identity/auth | 62 | 836 | +| 6 | draft-templin-intarea-aero2 | A2A protocols | 83 | 818 | +| 7 | draft-templin-6man-aero3 | A2A protocols | 82 | 809 | +| 8 | draft-templin-intarea-aero | Autonomous netops | 73 | 796 | +| 9 | draft-ietf-ace-coap-est-oscore | Agent identity/auth | 31 | 772 | +| 10 | draft-birkholz-verifiable-agent-conversations | AI safety/alignment | 24 | 759 | +| 11 | draft-gaikwad-woa | Agent discovery/reg | 22 | 731 | +| 12 | draft-sipos-dtn-bp-safe | Other | 31 | 728 | +| 13 | draft-ietf-lake-authz | Agent identity/auth | 27 | 722 | +| 14 | draft-ietf-lake-app-profiles | Data formats/interop | 36 | 718 | +| 15 | draft-ietf-emu-eap-edhoc | Other | 28 | 715 | +| 16 | draft-meunier-webbotauth-registry | Agent identity/auth | 25 | 693 | +| 17 | draft-ietf-httpbis-rfc6265bis | Other | 22 | 690 | +| 18 | draft-mishra-oauth-agent-grants | Agent identity/auth | 21 | 685 | +| 19 | draft-ravikiran-clawdentity-protocol | Agent identity/auth | 16 | 681 | +| 20 | draft-gaikwad-south-authorization | Agent identity/auth | 11 | 678 | + +## Citation Density by Category + +| Category | Drafts | Total Refs | Avg Refs/Draft | +|:---------|-------:|-----------:|---------------:| +| Data formats/interop | 16 | 266 | 16.6 | +| Other AI/agent | 1 | 15 | 15.0 | +| Agent identity/auth | 58 | 867 | 14.9 | +| Agent discovery/reg | 15 | 218 | 14.5 | +| Other | 116 | 1351 | 11.6 | +| AI safety/alignment | 11 | 118 | 10.7 | +| A2A protocols | 52 | 553 | 10.6 | +| Policy/governance | 16 | 170 | 10.6 | +| Autonomous netops | 36 | 348 | 9.7 | +| Model serving/inference | 12 | 106 | 8.8 | +| ML traffic mgmt | 24 | 201 | 8.4 | +| Human-agent interaction | 3 | 18 | 6.0 | + +## Most-Referenced Drafts (Draft-to-Draft) + +| # | Draft | Cited By | +|--:|-------|--------:| +| 1 | draft-ietf | 44 drafts | +| 2 | draft-rosenberg-ai-protocols | 14 drafts | +| 3 | draft-ietf-cose | 8 drafts | +| 4 | draft-ietf-moq-transport | 7 drafts | +| 5 | draft-ietf-oauth | 7 drafts | +| 6 | draft-ietf-pquip | 7 drafts | +| 7 | draft-ietf-tls | 7 drafts | +| 8 | draft-ietf-core | 6 drafts | +| 9 | draft-ietf-tls-hybrid-design | 5 drafts | +| 10 | draft-irtf-nmrg-ai-challenges | 5 drafts | +| 11 | draft-ietf-ace | 4 drafts | +| 12 | draft-ietf-aipref | 4 drafts | +| 13 | draft-ietf-lake | 4 drafts | +| 14 | draft-ietf-lake-authz | 4 drafts | +| 15 | draft-ietf-lamps-kyber-certificates | 4 drafts | +| 16 | draft-ietf-moq | 4 drafts | +| 17 | draft-ietf-pquip-pqt-hybrid-terminology | 4 drafts | +| 18 | draft-irtf | 4 drafts | +| 19 | draft-irtf-nmrg | 4 drafts | +| 20 | draft-irtf-nmrg-network-digital | 4 drafts | + +--- + +## BCP Dependency Analysis + +- **36** unique BCPs cited across the corpus +- **1090** total BCP citations +- **360** of 761 drafts (47.3%) cite at least one BCP + +### All BCPs by Citation Count + +| # | BCP | Cited By | Example Drafts | +|--:|-----|--------:|:---------------| +| 1 | BCP 78 | 360 | draft-zhang-agent-gap-network, draft-cui-ai-agent-task, draft-liu-agent-context-protocol +357 more | +| 2 | BCP 79 | 360 | draft-zhang-agent-gap-network, draft-cui-ai-agent-task, draft-liu-agent-context-protocol +357 more | +| 3 | BCP 14 | 278 | draft-zhang-agent-gap-network, draft-cui-ai-agent-task, draft-liu-agent-context-protocol +275 more | +| 4 | BCP 26 | 23 | draft-ietf-sshm-ssh-agent, draft-mzsg-rtgwg-agent-cross-device-comm-framework, draft-mao-rtgwg-agent-comm-protocol-gap-analysis +20 more | +| 5 | BCP 13 | 8 | draft-narvaneni-agent-uri, draft-birkholz-verifiable-agent-conversations, draft-gaikwad-woa +5 more | +| 6 | BCP 205 | 8 | draft-ietf-sshm-ssh-agent, draft-jurkovikj-httpapi-agentic-state, draft-sipos-dtn-bp-safe +5 more | +| 7 | BCP 106 | 7 | draft-ietf-anima-brski-prm, draft-aylward-daap-v2, draft-ra-emu-pqc-eapaka +4 more | +| 8 | BCP 222 | 4 | draft-mozleywilliams-dnsop-dnsaid, draft-eggert-mailmaint-uaautoconf, draft-mozleywilliams-dnsop-bandaid +1 more | +| 9 | BCP 35 | 4 | draft-narvaneni-agent-uri, draft-aylward-aiga-1, draft-aylward-aiga-2 +1 more | +| 10 | BCP 5 | 4 | draft-gaikwad-woa, draft-templin-intarea-aero2, draft-templin-intarea-aero +1 more | +| 11 | BCP 72 | 4 | draft-steele-agent-considerations, draft-sipos-dtn-bp-safe, draft-narajala-ans +1 more | +| 12 | BCP 100 | 2 | draft-ietf-lake-app-profiles, draft-ietf-ace-edhoc-oscore-profile | +| 13 | BCP 153 | 2 | draft-templin-intarea-aero2, draft-templin-6man-aero3 | +| 14 | BCP 195 | 2 | draft-ietf-anima-brski-prm, draft-ietf-tls-deprecate-obsolete-kex | +| 15 | BCP 56 | 2 | draft-gupta-httpapi-events-query, draft-chen-nmrg-semantic-inference-routing | +| 16 | BCP 81 | 2 | draft-ietf-netconf-configuration-tracing, draft-williams-netmod-lm-hierarchy-topology | +| 17 | BCP 126 | 1 | draft-pbs-sidrops-roaanycast | +| 18 | BCP 147 | 1 | draft-khatri-sipcore-call-transfer-fail-response | +| 19 | BCP 152 | 1 | draft-liang-agentdns | +| 20 | BCP 169 | 1 | draft-pbs-sidrops-roaanycast | +| 21 | BCP 185 | 1 | draft-pbs-sidrops-roaanycast | +| 22 | BCP 199 | 1 | draft-gont-dhcwg-dhcpv6-iids | +| 23 | BCP 215 | 1 | draft-ietf-netconf-configuration-tracing | +| 24 | BCP 219 | 1 | draft-tjw-dbound2-problem-statement | +| 25 | BCP 225 | 1 | draft-mishra-oauth-agent-grants | +| 26 | BCP 232 | 1 | draft-liang-agentdns | +| 27 | BCP 237 | 1 | draft-ietf-dnsop-ds-automation | +| 28 | BCP 240 | 1 | draft-meyerzuselha-oauth-web-message-response-mode | +| 29 | BCP 242 | 1 | draft-templin-6man-mla | +| 30 | BCP 38 | 1 | draft-li-spring-rdma-multicast-over-srv6 | +| 31 | BCP 40 | 1 | draft-liang-agentdns | +| 32 | BCP 51 | 1 | draft-ietf-pim-zeroconf-mcast-addr-alloc-ps | +| 33 | BCP 52 | 1 | draft-ietf-lake-authz | +| 34 | BCP 67 | 1 | draft-howe-sipcore-mcp-extension | +| 35 | BCP 74 | 1 | draft-yang-nmrg-mcp-nm | +| 36 | BCP 95 | 1 | draft-gupta-httpapi-events-query | + +### BCP Usage by Category + +| Category | BCP Refs | Unique BCPs | Top BCPs | +|:---------|--------:|-----------:|:---------| +| Other | 351 | 18 | BCP78(116), BCP79(116), BCP14(94) | +| Agent identity/auth | 177 | 10 | BCP78(58), BCP79(58), BCP14(49) | +| A2A protocols | 157 | 9 | BCP78(52), BCP79(52), BCP14(44) | +| Autonomous netops | 96 | 7 | BCP78(36), BCP79(36), BCP14(20) | +| ML traffic mgmt | 67 | 5 | BCP78(24), BCP79(24), BCP14(17) | +| Data formats/interop | 61 | 11 | BCP78(16), BCP79(16), BCP14(14) | +| Agent discovery/reg | 54 | 12 | BCP78(15), BCP79(15), BCP14(13) | +| Policy/governance | 48 | 8 | BCP78(16), BCP79(16), BCP14(11) | +| Model serving/inference | 38 | 7 | BCP78(12), BCP79(12), BCP14(9) | +| AI safety/alignment | 32 | 7 | BCP78(11), BCP79(11), BCP14(6) | +| Human-agent interaction | 6 | 2 | BCP78(3), BCP79(3) | +| Other AI/agent | 3 | 3 | BCP14(1), BCP78(1), BCP79(1) | + +### Top BCP Co-Citations + +BCP pairs most frequently cited together in the same draft. + +| BCP A | BCP B | Co-cited in | +|:------|:------|----------:| +| BCP 78 | BCP 79 | 360 drafts | +| BCP 14 | BCP 78 | 278 drafts | +| BCP 14 | BCP 79 | 278 drafts | +| BCP 26 | BCP 78 | 23 drafts | +| BCP 26 | BCP 79 | 23 drafts | +| BCP 14 | BCP 26 | 22 drafts | +| BCP 13 | BCP 14 | 8 drafts | +| BCP 13 | BCP 78 | 8 drafts | +| BCP 13 | BCP 79 | 8 drafts | +| BCP 14 | BCP 205 | 8 drafts | +| BCP 205 | BCP 78 | 8 drafts | +| BCP 205 | BCP 79 | 8 drafts | +| BCP 106 | BCP 14 | 7 drafts | +| BCP 106 | BCP 78 | 7 drafts | +| BCP 106 | BCP 79 | 7 drafts | \ No newline at end of file diff --git a/data/reports/complexity.md b/data/reports/complexity.md index 8779886..476dbbe 100644 --- a/data/reports/complexity.md +++ b/data/reports/complexity.md @@ -1,5 +1,5 @@ # Draft Complexity Matrix -*Generated 2026-03-08 18:05 UTC — 688 rated drafts (57.6% have page data)* +*Generated 2026-03-08 19:32 UTC — 688 rated drafts (57.6% have page data)* ## Correlation Matrix diff --git a/data/reports/false-positives.md b/data/reports/false-positives.md index d5a4b96..c578fc0 100644 --- a/data/reports/false-positives.md +++ b/data/reports/false-positives.md @@ -1,5 +1,5 @@ # False Positive Profile Report -*Generated 2026-03-08 18:04 UTC* +*Generated 2026-03-08 19:32 UTC* ## Overview diff --git a/data/reports/idea-analysis.md b/data/reports/idea-analysis.md new file mode 100644 index 0000000..687aa0e --- /dev/null +++ b/data/reports/idea-analysis.md @@ -0,0 +1,146 @@ +# Idea Novelty Deep Dive +*Generated 2026-03-08 19:32 UTC — 742 ideas, 365 scored, avg novelty 3.02* + +**Embedding coverage**: 419/742 (56.5%) + +## Novelty Score Distribution + +| Score | Count | Bar | +|------:|------:|-----| +| 1 | 0 | | +| 2 | 95 | ▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃ | +| 3 | 167 | ▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅ | +| 4 | 102 | ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ | +| 5 | 1 | █ | + +## Ideas by Type + +| Type | Count | Avg Novelty | +|------|------:|------------:| +| architecture | 199 | 3.01 | +| protocol | 175 | 3.16 | +| requirement | 102 | 2.73 | +| mechanism | 90 | 3.22 | +| pattern | 81 | 2.81 | +| extension | 73 | 2.98 | +| framework | 18 | 3.33 | +| format | 1 | 2.00 | +| process | 1 | 0.00 | +| guidance | 1 | 0.00 | +| methodology | 1 | 0.00 | + +## Top 20 Most Novel Ideas + +| # | Score | Idea | Type | Draft | +|--:|------:|------|------|-------| +| 1 | 5 | Real-Virtual Agent Protocol (RVP) | protocol | [zhang-rvp-problem-statement](https://datatracker.ietf.org/doc/draft-zhang-rvp-problem-statement/) | +| 2 | 4 | AI-Disclosure HTTP Response Header | mechanism | [abaris-aicdh](https://datatracker.ietf.org/doc/draft-abaris-aicdh/) | +| 3 | 4 | Federated AI Service Operations Model | pattern | [aft-ai-traffic](https://datatracker.ietf.org/doc/draft-aft-ai-traffic/) | +| 4 | 4 | Federated AI Service Operations Model | pattern | [ai-traffic](https://datatracker.ietf.org/doc/draft-ai-traffic/) | +| 5 | 4 | AI-Native Network Protocol (AINP) | protocol | [ainp-protocol](https://datatracker.ietf.org/doc/draft-ainp-protocol/) | +| 6 | 4 | Early Retransmit Mechanism | mechanism | [allman-tcp-early-rexmt](https://datatracker.ietf.org/doc/draft-allman-tcp-early-rexmt/) | +| 7 | 4 | Bitcoin-Anchored AGI Identity Protocol | protocol | [architect-cittamarket](https://datatracker.ietf.org/doc/draft-architect-cittamarket/) | +| 8 | 4 | Tiered Risk-Based Governance for Autonomous AI Age | architecture | [aylward-aiga-1](https://datatracker.ietf.org/doc/draft-aylward-aiga-1/) | +| 9 | 4 | Tiered Risk-Based Governance for Autonomous AI Age | architecture | [aylward-aiga-2](https://datatracker.ietf.org/doc/draft-aylward-aiga-2/) | +| 10 | 4 | Distributed AI Accountability Protocol | protocol | [aylward-daap-v2](https://datatracker.ietf.org/doc/draft-aylward-daap-v2/) | +| 11 | 4 | Post-Discovery Authorization Handshake | protocol | [barney-caam](https://datatracker.ietf.org/doc/draft-barney-caam/) | +| 12 | 4 | Zero Trust Runtime Agent Architecture | architecture | [berlinai-vera](https://datatracker.ietf.org/doc/draft-berlinai-vera/) | +| 13 | 4 | Evidence-based Autonomy Maturity Model | mechanism | [berlinai-vera](https://datatracker.ietf.org/doc/draft-berlinai-vera/) | +| 14 | 4 | Asynchronous Remote Key Generation Algorithm | protocol | [bradleylundberg-cfrg-arkg](https://datatracker.ietf.org/doc/draft-bradleylundberg-cfrg-arkg/) | +| 15 | 4 | AI Inference Fabric Benchmarking Methodology | mechanism | [calabria-bmwg-ai-fabric-inference-b](https://datatracker.ietf.org/doc/draft-calabria-bmwg-ai-fabric-inference-bench/) | +| 16 | 4 | Agentic Hypercall Protocol | pattern | [campbell-agentic-http](https://datatracker.ietf.org/doc/draft-campbell-agentic-http/) | +| 17 | 4 | Intent-Based Just-in-Time Authorization | architecture | [chen-agent-decoupled-authorization-](https://datatracker.ietf.org/doc/draft-chen-agent-decoupled-authorization-model/) | +| 18 | 4 | Semantic Inference Routing Protocol | protocol | [chen-nmrg-semantic-inference-routin](https://datatracker.ietf.org/doc/draft-chen-nmrg-semantic-inference-routing/) | +| 19 | 4 | Structured OAuth Scope Syntax for Agent Permission | extension | [chen-oauth-scope-agent-extensions](https://datatracker.ietf.org/doc/draft-chen-oauth-scope-agent-extensions/) | +| 20 | 4 | Cosmos Protocol: Trust-Native Identity and Communi | protocol | [cosmos-protocol-specification](https://datatracker.ietf.org/doc/draft-cosmos-protocol-specification/) | + +## Ideas per Draft + +| Ideas/Draft | Drafts | +|------------:|-------:| +| 1 | 563 | +| 2 | 82 | +| 3 | 5 | + +### Most Prolific Drafts + +| Draft | Ideas | Score | +|-------|------:|------:| +| cui-ai-agent-discovery-invocation | 3 | 3.85 | +| duda-dnsop-dns-did | 3 | -- | +| kartha-internet20-ainative | 3 | 3.45 | +| melnikov-sasl2 | 3 | 3.70 | +| iso-iec-pwi-26200 | 3 | 3.20 | +| aft-ai-traffic | 2 | 3.05 | +| ahn-nmrg-5g-security-i2nsf-framework | 2 | 3.50 | +| ahn-opsawg-5g-security-i2nsf-framework | 2 | 3.25 | +| ai-traffic | 2 | -- | +| an-nmrg-i2icf-cits | 2 | 3.70 | + +## Shared Ideas (86 ideas in 2+ drafts) + +| Idea | Appearances | Drafts | +|------|------------:|--------| +| MCP Extensions for Network Equipment Management | 5 | draft-zeng-mcp-network-mgmt, draft-zeng-mcp-troubleshooting, draft-zw-nmrg-mcp-network-mgmt, draft-zw-opsawg-mcp-network-mgmt, draft-zw-rtgwg-mcp-network-mgmt | +| IT Trustworthiness Ontology Framework | 5 | iso-iec-awi-31310-1, iso-iec-pwi-9991, iso-iec-ts-30149-2024, iso-iec-ts-5723-2022, nist-ai-100-6 | +| Compliance Check Communication Protocol for Fee Collection | 4 | iso-12813-2015, iso-12813-2019, iso-12813-2024, iso-ts-12813-2009 | +| Hybrid Post-Quantum Cryptography for EAP-AKA' | 3 | draft-ar-emu-hybrid-pqc-eapaka, draft-ar-emu-pqc-eapaka, draft-ietf-emu-hybrid-pqc-eapaka | +| AC/TE YANG Models for Edge AI Placement | 3 | draft-dunbar-neotec-ac-te-applicability, draft-dunbar-onions-ac-te-applicability, draft-dunbar-onsen-ac-te-applicability | +| Multi-Agent Communication Framework for AIOps | 3 | draft-fu-nmop-agent-communication-framework, draft-liu-agent-protocol-over-moq, draft-rosenberg-ai-protocols | +| Distributed AI Inference Architecture | 3 | draft-hong-nmrg-ai-deploy, draft-irtf-nmrg-ai-deploy, draft-song-rtgwg-din-usecases-requirements | +| AI Agent Protocol Framework | 3 | draft-hw-protocol-agent, draft-rosenberg-aiproto-framework, draft-zyyhl-agent-networks-framework | +| PSK authentication method for EDHOC | 3 | draft-ietf-lake-edhoc-psk, draft-lake-pocero-authkem-edhoc, draft-pocero-authkem-edhoc | +| LLM-Assisted Network Management Framework | 3 | draft-irtf-nmrg-llm-nm, draft-zeng-mcp-network-measurement, draft-zm-rtgwg-mcp-network-measurement | +| Model Context Protocol over MOQT | 3 | draft-jennings-ai-mcp-over-moq, draft-jennings-mcp-over-moqt, draft-mcp-over-moqt | +| Micro Agent Communication Protocol (µACP) | 3 | draft-mallick-muacp, draft-stephan-ai-agent-6g, draft-zhang-rtgwg-ai-agents-measurement | +| Agent Gateway-Based Networking Architecture | 3 | draft-men-rtgwg-agent-networking-digibank-scenario, draft-zl-agents-networking-architecture, iso-iec-pwi-5096 | +| Automatic Extended Route Optimization over OMNI Interfaces | 3 | draft-templin-6man-aero3, draft-templin-intarea-aero, draft-templin-intarea-aero2 | +| Localization Augmentation Communication Protocol | 3 | iso-13141-2015, iso-13141-2024, iso-ts-13141-2010 | +| Supply Chain Trustworthiness Framework | 3 | iso-22373-2025, iso-iec-cd-11034.2, iso-iec-tr-24028-2020 | +| Trustworthy electronically stored information system | 3 | iso-fdis-15801, iso-np-15801, iso-tr-15801-2017 | +| A2A Protocol Transport over MOQT | 2 | draft-a2a-moqt-transport, draft-nandakumar-a2a-moqt-transport | +| Agent Authorization Profile for OAuth 2.0 | 2 | draft-aap-oauth-profile, draft-patwhite-aauth | +| AI Traffic Characterization Framework | 2 | draft-aft-ai-traffic, draft-ai-traffic | +| Federated AI Service Operations Model | 2 | draft-aft-ai-traffic, draft-ai-traffic | +| Intent-Based Security Policy Translation for 5G | 2 | draft-ahn-nmrg-5g-security-i2nsf-framework, draft-ahn-opsawg-5g-security-i2nsf-framework | +| Distributed Policy Enforcement via NEF and IBN Controllers | 2 | draft-ahn-nmrg-5g-security-i2nsf-framework, draft-ahn-opsawg-5g-security-i2nsf-framework | +| Tiered Risk-Based Governance for Autonomous AI Agents | 2 | draft-aylward-aiga-1, draft-aylward-aiga-2 | +| Integrated Sensing and Communications for CATS | 2 | draft-bernardos-cats-isac-uc, draft-bernardos-green-isac-uc | +| AI Inference Fabric Benchmarking Methodology | 2 | draft-calabria-bmwg-ai-fabric-inference-bench, draft-gaikwad-llm-benchmarking-methodology | +| Agentic Hypercall Protocol | 2 | draft-campbell-agentic-http, draft-kotecha-agentic-dispute-protocol | +| Intent-Based Network Management Architecture | 2 | draft-chen-nmrg-ibn-management, itu-t-y-3179 | +| Cross-Domain Agent Interoperability Framework | 2 | draft-cui-dmsc-agent-cdi, draft-liu-saag-zt-problem-statement | +| AI Agent Communication Network Requirements | 2 | draft-du-ai-agent-communication-6g-aspect, draft-jiang-cats-reference-acn | + +## Ideas by Type x Source + +| Type | etsi | ietf | iso | itu | nist | w3c | +|------|-----:|-----:|-----:|-----:|-----:|-----:| +| architecture | 3 | 141 | 39 | 11 | 4 | 1 | +| protocol | 1 | 129 | 40 | 2 | 3 | 0 | +| requirement | 4 | 68 | 26 | 2 | 0 | 2 | +| mechanism | 1 | 69 | 18 | 0 | 2 | 0 | +| pattern | 1 | 52 | 23 | 2 | 3 | 0 | +| extension | 0 | 71 | 2 | 0 | 0 | 0 | +| framework | 0 | 5 | 12 | 1 | 0 | 0 | +| format | 0 | 1 | 0 | 0 | 0 | 0 | +| process | 0 | 0 | 1 | 0 | 0 | 0 | +| guidance | 0 | 1 | 0 | 0 | 0 | 0 | +| methodology | 0 | 0 | 1 | 0 | 0 | 0 | + +## Correlation: Idea Novelty vs Draft Relevance + +Pearson r = **0.099** (n=267 drafts with both scores) + +Positive correlation — weak or no linear relationship between idea novelty and draft relevance. + +## Embedding Status + +419 of 742 ideas (56.5%) have embeddings. +To complete the remaining 323 embeddings, run: + +``` +ietf embed-ideas +``` + +This requires Ollama running locally with the configured embedding model. \ No newline at end of file diff --git a/data/reports/sources.md b/data/reports/sources.md index c61f22b..bffa948 100644 --- a/data/reports/sources.md +++ b/data/reports/sources.md @@ -1,5 +1,5 @@ # Cross-Source Comparison Report -*Generated 2026-03-08 18:04 UTC — 761 drafts across 6 sources* +*Generated 2026-03-08 19:32 UTC — 761 drafts across 6 sources* ## Summary diff --git a/src/ietf_analyzer/reports.py b/src/ietf_analyzer/reports.py index 6d598a3..2a4e833 100644 --- a/src/ietf_analyzer/reports.py +++ b/src/ietf_analyzer/reports.py @@ -951,39 +951,124 @@ class Reporter: return str(path) def trends_report(self) -> str: - """Generate category trend analysis report with monthly breakdown and growth rates.""" + """Generate full temporal evolution report with monthly stats, ratings, safety ratio, and growth.""" now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC") + conn = self.db.conn pairs = self.db.drafts_with_ratings(limit=500) all_drafts = self.db.list_drafts(limit=500, order_by="time ASC") total = len(all_drafts) rating_map = {draft.name: rating for draft, rating in pairs} - # Monthly counts per category - monthly: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int)) + # Monthly submission counts by source + source_monthly: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int)) + for d in all_drafts: + month = d.time[:7] if d.time else "unknown" + if month != "unknown": + src = getattr(d, "source", "ietf") or "ietf" + source_monthly[month][src] += 1 + + # Monthly category counts + cat_monthly: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int)) all_cats: set[str] = set() for d in all_drafts: month = d.time[:7] if d.time else "unknown" r = rating_map.get(d.name) - if r: + if r and month != "unknown": for c in r.categories: - monthly[month][c] += 1 + cat_monthly[month][c] += 1 all_cats.add(c) - months = sorted(m for m in monthly.keys() if m != "unknown") + months = sorted(m for m in set(list(source_monthly.keys()) + list(cat_monthly.keys())) if m != "unknown") cats = sorted(all_cats) + # Monthly average ratings + rating_monthly: dict[str, dict[str, list[int]]] = defaultdict(lambda: defaultdict(list)) + for d in all_drafts: + month = d.time[:7] if d.time else "unknown" + r = rating_map.get(d.name) + if r and month != "unknown": + for dim in ("novelty", "maturity", "overlap", "momentum", "relevance"): + rating_monthly[month][dim].append(getattr(r, dim)) + + # Safety vs capability categories + safety_cats = {"Security", "Privacy", "Trust & Identity", "Safety", "Governance & Policy", "Ethics"} + capability_cats = {"Agent Communication", "Agent Framework", "AI Infrastructure", + "Model Serving", "MCP", "Orchestration", "Tool Use", + "Prompt Engineering", "Inference", "LLM Integration"} + + # Monthly new authors + author_rows = conn.execute(""" + SELECT da.person_id, MIN(substr(d.time, 1, 7)) AS first_month + FROM draft_authors da + JOIN drafts d ON da.draft_name = d.name + WHERE d.time IS NOT NULL AND d.time != '' + GROUP BY da.person_id + """).fetchall() + new_author_monthly: dict[str, int] = defaultdict(int) + for r in author_rows: + if r["first_month"]: + new_author_monthly[r["first_month"]] += 1 + + # Cumulative idea counts + idea_rows = conn.execute(""" + SELECT substr(d.time, 1, 7) AS month, COUNT(i.id) AS cnt + FROM ideas i + JOIN drafts d ON i.draft_name = d.name + WHERE d.time IS NOT NULL AND d.time != '' + GROUP BY month ORDER BY month + """).fetchall() + idea_cumulative = {} + running = 0 + for r in idea_rows: + running += r["cnt"] + idea_cumulative[r["month"]] = running + + def _trend(val, prev_val): + if prev_val is None: + return "" + if val > prev_val: + return " \u2191" + elif val < prev_val: + return " \u2193" + return " \u2192" + lines = [ - "# Category Trend Analysis", - f"*Generated {now} — {total} drafts, {len(months)} months, {len(cats)} categories*\n", + "# Temporal Evolution Report", + f"*Generated {now} \u2014 {total} drafts, {len(months)} months*\n", ] - # Growth summary + # Monthly stats table + lines.extend([ + "## Monthly Overview\n", + "| Month | Submissions | New Authors | Cum. Ideas | Avg Novelty | Avg Maturity | Avg Relevance | Safety Ratio |", + "|-------|------------:|------------:|-----------:|------------:|-------------:|--------------:|-------------:|", + ]) + prev_total = None + for month in months: + total_sub = sum(source_monthly[month].values()) + new_auth = new_author_monthly.get(month, 0) + cum_ideas = idea_cumulative.get(month, 0) + dims = rating_monthly.get(month, {}) + avg_n = sum(dims.get("novelty", [0])) / max(len(dims.get("novelty", [1])), 1) + avg_m = sum(dims.get("maturity", [0])) / max(len(dims.get("maturity", [1])), 1) + avg_r = sum(dims.get("relevance", [0])) / max(len(dims.get("relevance", [1])), 1) + safety = sum(cat_monthly[month].get(c, 0) for c in safety_cats) + capability = sum(cat_monthly[month].get(c, 0) for c in capability_cats) + ratio = f"{safety / capability:.2f}" if capability > 0 else "-" + trend = _trend(total_sub, prev_total) + prev_total = total_sub + lines.append( + f"| {month} | {total_sub}{trend} | {new_auth} | {cum_ideas} | " + f"{avg_n:.1f} | {avg_m:.1f} | {avg_r:.1f} | {ratio} |" + ) + + # Category growth summary recent_months = months[-3:] if len(months) >= 3 else months prev_months = months[-6:-3] if len(months) >= 6 else [] lines.extend([ - "## Growth Summary\n", + "\n## Category Growth Summary\n", "| Category | Total | Last 3mo | Prev 3mo | Growth |", "|----------|------:|---------:|---------:|-------:|", ]) @@ -991,12 +1076,12 @@ class Reporter: cumulative: dict[str, int] = defaultdict(int) for month in months: for cat in cats: - cumulative[cat] += monthly[month].get(cat, 0) + cumulative[cat] += cat_monthly[month].get(cat, 0) for cat in cats: total_cat = cumulative[cat] - recent = sum(monthly[m].get(cat, 0) for m in recent_months) - prev = sum(monthly[m].get(cat, 0) for m in prev_months) if prev_months else 0 + recent = sum(cat_monthly[m].get(cat, 0) for m in recent_months) + prev = sum(cat_monthly[m].get(cat, 0) for m in prev_months) if prev_months else 0 if prev > 0: growth_str = f"{((recent - prev) / prev) * 100:+.0f}%" elif recent > 0: @@ -1005,44 +1090,41 @@ class Reporter: growth_str = "-" lines.append(f"| {cat} | {total_cat} | {recent} | {prev if prev_months else '-'} | {growth_str} |") - # Monthly detail table - lines.extend(["\n## Monthly Breakdown\n"]) - header = "| Month |" + " | ".join(f" {c[:15]}" for c in cats) + " | Total |" - sep = "|-------|" + " | ".join("---:" for _ in cats) + " | -----:|" - lines.append(header) - lines.append(sep) - - for month in months: - counts = [str(monthly[month].get(c, 0)) for c in cats] - month_total = sum(monthly[month].values()) - lines.append(f"| {month} | " + " | ".join(counts) + f" | {month_total} |") - - # Half-over-half comparison + # Fastest growing categories (early vs late half) if len(months) >= 4: mid = len(months) // 2 early = months[:mid] late = months[mid:] - - lines.extend([ - "\n## Fastest Growing Categories (early vs late half)\n", - ]) - + lines.extend(["\n## Fastest Growing Categories (early vs late half)\n"]) growth_data = [] for cat in cats: - e = sum(monthly[m].get(cat, 0) for m in early) - l = sum(monthly[m].get(cat, 0) for m in late) + e = sum(cat_monthly[m].get(cat, 0) for m in early) + l_val = sum(cat_monthly[m].get(cat, 0) for m in late) if e > 0: - pct = ((l - e) / e) * 100 - growth_data.append((cat, pct, e, l)) - elif l > 0: - growth_data.append((cat, float("inf"), e, l)) - + pct = ((l_val - e) / e) * 100 + growth_data.append((cat, pct, e, l_val)) + elif l_val > 0: + growth_data.append((cat, float("inf"), e, l_val)) growth_data.sort(key=lambda x: x[1], reverse=True) - for cat, pct, e, l in growth_data: + for cat, pct, e, l_val in growth_data: if pct == float("inf"): - lines.append(f"- **{cat}**: new (0 -> {l} drafts)") + lines.append(f"- **{cat}**: new (0 \u2192 {l_val} drafts)") else: - lines.append(f"- **{cat}**: {pct:+.0f}% ({e} -> {l} drafts)") + lines.append(f"- **{cat}**: {pct:+.0f}% ({e} \u2192 {l_val} drafts)") + + # Rating trends + lines.extend(["\n## Rating Dimension Trends\n"]) + if len(months) >= 2: + first_half = months[:len(months) // 2] + second_half = months[len(months) // 2:] + for dim in ("novelty", "maturity", "overlap", "momentum", "relevance"): + early_vals = [v for m in first_half for v in rating_monthly.get(m, {}).get(dim, [])] + late_vals = [v for m in second_half for v in rating_monthly.get(m, {}).get(dim, [])] + early_avg = sum(early_vals) / len(early_vals) if early_vals else 0 + late_avg = sum(late_vals) / len(late_vals) if late_vals else 0 + diff = late_avg - early_avg + arrow = "\u2191" if diff > 0.1 else ("\u2193" if diff < -0.1 else "\u2192") + lines.append(f"- **{dim.capitalize()}**: {early_avg:.2f} \u2192 {late_avg:.2f} ({diff:+.2f}) {arrow}") report = "\n".join(lines) path = self.output_dir / "trends.md" diff --git a/src/webui/app.py b/src/webui/app.py index 0117cac..d71107c 100644 --- a/src/webui/app.py +++ b/src/webui/app.py @@ -735,6 +735,21 @@ def api_bcp_analysis(): return jsonify(get_bcp_analysis(db())) +# ── Idea Analysis ──────────────────────────────────────────────────────── + + +@app.route("/idea-analysis") +def idea_analysis(): + data = get_idea_analysis(db()) + return render_template("idea_analysis.html", data=data) + + +@app.route("/api/idea-analysis") +def api_idea_analysis(): + data = get_idea_analysis(db()) + return jsonify(data) + + # ── Trends & Complexity ────────────────────────────────────────────────── @@ -752,29 +767,12 @@ def complexity(): @app.route("/api/trends") def api_trends(): - data = get_trends_data(db()) - return jsonify(data) + return jsonify(get_trends_data(db())) @app.route("/api/complexity") def api_complexity(): - data = get_complexity_data(db()) - return jsonify(data) - - -# ── Idea Analysis ──────────────────────────────────────────────────────── - - -@app.route("/idea-analysis") -def idea_analysis(): - data = get_idea_analysis(db()) - return render_template("idea_analysis.html", data=data) - - -@app.route("/api/idea-analysis") -def api_idea_analysis(): - data = get_idea_analysis(db()) - return jsonify(data) + return jsonify(get_complexity_data(db())) if __name__ == "__main__": diff --git a/src/webui/data.py b/src/webui/data.py index 1fe8724..319f48c 100644 --- a/src/webui/data.py +++ b/src/webui/data.py @@ -2912,6 +2912,11 @@ def get_ask_synthesize(db: Database, question: str, top_k: int = 5, cheap: bool return searcher.ask(question, top_k=top_k, cheap=cheap) +SAFETY_CATEGORIES = {"AI safety/alignment", "Agent identity/auth", "Policy/governance"} +CAPABILITY_CATEGORIES = {"A2A protocols", "Agent discovery/reg", "Autonomous netops", + "Data formats/interop", "Human-agent interaction", "Model serving/inference"} + + def get_trends_data(db: Database) -> dict: """Return temporal evolution data for the /trends page. @@ -3315,3 +3320,1015 @@ def get_complexity_data(db: Database) -> dict: "category_complexity": category_complexity, "source_complexity": source_complexity, } + + +# ── Additional Analysis Functions ──────────────────────────────────── + +def get_idea_analysis(db: Database) -> dict: + """Return comprehensive idea analysis data for the idea-analysis page. + + Includes novelty distribution, type breakdown with avg novelty, + top novel ideas, ideas-per-draft distribution, cross-tab of type x source, + shared ideas across drafts, and idea novelty vs draft rating correlation. + """ + from collections import Counter, defaultdict + from difflib import SequenceMatcher + + # Fetch raw data + all_ideas = db.conn.execute( + """SELECT i.id, i.draft_name, i.title, i.description, i.idea_type, + i.novelty_score + FROM ideas i ORDER BY i.novelty_score DESC NULLS LAST""" + ).fetchall() + all_ideas = [dict(r) for r in all_ideas] + + # Draft ratings lookup + ratings_rows = db.conn.execute( + """SELECT d.name, d.title as draft_title, d.source, + r.novelty AS r_novelty, r.maturity, r.overlap, r.momentum, r.relevance + FROM drafts d LEFT JOIN ratings r ON d.name = r.draft_name""" + ).fetchall() + draft_info = {} + for r in ratings_rows: + row = dict(r) + # Compute composite score (average of 5 dimensions) + dims = [row.get("r_novelty"), row.get("maturity"), row.get("overlap"), + row.get("momentum"), row.get("relevance")] + valid = [d for d in dims if d is not None] + row["composite_score"] = sum(valid) / len(valid) if valid else None + draft_info[row["name"]] = row + + total = len(all_ideas) + scored = [i for i in all_ideas if i.get("novelty_score") is not None] + unscored = total - len(scored) + avg_novelty = sum(i["novelty_score"] for i in scored) / len(scored) if scored else 0 + + # Embedding coverage + embed_count = db.conn.execute("SELECT COUNT(*) FROM idea_embeddings").fetchone()[0] + + # --- Novelty score distribution (histogram) --- + novelty_dist = Counter(i["novelty_score"] for i in scored) + novelty_histogram = { + "labels": [1, 2, 3, 4, 5], + "values": [novelty_dist.get(s, 0) for s in [1, 2, 3, 4, 5]], + } + + # --- Ideas by type with counts and avg novelty --- + type_data = defaultdict(lambda: {"count": 0, "novelty_sum": 0, "novelty_n": 0}) + for idea in all_ideas: + t = idea.get("idea_type") or "other" + type_data[t]["count"] += 1 + if idea.get("novelty_score") is not None: + type_data[t]["novelty_sum"] += idea["novelty_score"] + type_data[t]["novelty_n"] += 1 + + by_type = [] + for t, d in sorted(type_data.items(), key=lambda x: x[1]["count"], reverse=True): + avg = d["novelty_sum"] / d["novelty_n"] if d["novelty_n"] > 0 else 0 + by_type.append({"type": t, "count": d["count"], "avg_novelty": round(avg, 2)}) + + type_names = [t["type"] for t in by_type] + + # --- Top 20 most novel ideas (score 4-5) --- + top_novel = [] + for idea in all_ideas: + if idea.get("novelty_score") and idea["novelty_score"] >= 4: + di = draft_info.get(idea["draft_name"], {}) + top_novel.append({ + "title": idea["title"], + "description": idea["description"], + "type": idea.get("idea_type", "other"), + "novelty_score": idea["novelty_score"], + "draft_name": idea["draft_name"], + "draft_title": di.get("draft_title", ""), + "draft_score": di.get("composite_score"), + }) + top_novel.sort(key=lambda x: (x["novelty_score"], x.get("draft_score") or 0), reverse=True) + top_novel = top_novel[:20] + + # --- Ideas per draft distribution --- + ideas_per_draft = Counter(i["draft_name"] for i in all_ideas) + ipd_dist = Counter(ideas_per_draft.values()) + ideas_per_draft_hist = { + "labels": sorted(ipd_dist.keys()), + "values": [ipd_dist[k] for k in sorted(ipd_dist.keys())], + } + # Also top drafts by idea count + top_idea_drafts = [] + for name, count in ideas_per_draft.most_common(10): + di = draft_info.get(name, {}) + top_idea_drafts.append({ + "name": name, + "draft_title": di.get("draft_title", ""), + "idea_count": count, + "score": di.get("composite_score"), + }) + + # --- Cross-tabulation: idea_type x source --- + type_source = defaultdict(lambda: defaultdict(int)) + for idea in all_ideas: + t = idea.get("idea_type") or "other" + di = draft_info.get(idea["draft_name"], {}) + source = di.get("source", "ietf") or "ietf" + type_source[t][source] += 1 + + sources = sorted(set( + di.get("source", "ietf") or "ietf" for di in draft_info.values() + )) + cross_tab = [] + for t in type_names: + row = {"type": t} + for s in sources: + row[s] = type_source[t].get(s, 0) + cross_tab.append(row) + + # --- Shared ideas across drafts --- + idea_groups: list[dict] = [] + for idea in all_ideas: + title_lower = idea["title"].lower().strip() + matched = False + for group in idea_groups: + ratio = SequenceMatcher(None, title_lower, group["canonical"]).ratio() + if ratio >= 0.75: + group["ideas"].append(idea) + group["drafts"].add(idea["draft_name"]) + matched = True + break + if not matched: + idea_groups.append({ + "canonical": title_lower, + "title": idea["title"], + "ideas": [idea], + "drafts": {idea["draft_name"]}, + }) + + shared_ideas = [] + for g in sorted(idea_groups, key=lambda x: len(x["drafts"]), reverse=True): + if len(g["drafts"]) < 2: + break + shared_ideas.append({ + "title": g["title"], + "appearances": len(g["drafts"]), + "drafts": sorted(g["drafts"])[:8], + "types": list(set(i.get("idea_type", "other") for i in g["ideas"])), + }) + + # --- Scatter: draft avg idea novelty vs draft relevance --- + draft_idea_novelty = defaultdict(list) + for idea in scored: + draft_idea_novelty[idea["draft_name"]].append(idea["novelty_score"]) + + scatter_data = [] + for name, scores in draft_idea_novelty.items(): + di = draft_info.get(name, {}) + if di.get("relevance") is not None and di.get("composite_score") is not None: + scatter_data.append({ + "name": name, + "avg_idea_novelty": round(sum(scores) / len(scores), 2), + "relevance": di["relevance"], + "score": di["composite_score"], + "idea_count": len(scores), + "source": di.get("source", "ietf") or "ietf", + }) + + # --- Sunburst data: type -> novelty band --- + sunburst_labels = [] + sunburst_parents = [] + sunburst_values = [] + # Root + sunburst_labels.append("All Ideas") + sunburst_parents.append("") + sunburst_values.append(total) + + novelty_bands = {"High (4-5)": lambda s: s is not None and s >= 4, + "Medium (3)": lambda s: s is not None and s == 3, + "Low (1-2)": lambda s: s is not None and s <= 2, + "Unscored": lambda s: s is None} + + for t_info in by_type: + t = t_info["type"] + sunburst_labels.append(t) + sunburst_parents.append("All Ideas") + sunburst_values.append(t_info["count"]) + # Sub-bands + type_ideas = [i for i in all_ideas if (i.get("idea_type") or "other") == t] + for band, fn in novelty_bands.items(): + cnt = sum(1 for i in type_ideas if fn(i.get("novelty_score"))) + if cnt > 0: + sunburst_labels.append(f"{t} - {band}") + sunburst_parents.append(t) + sunburst_values.append(cnt) + + return { + "total": total, + "scored": len(scored), + "unscored": unscored, + "avg_novelty": round(avg_novelty, 2), + "embed_count": embed_count, + "embed_pct": round(embed_count / total * 100, 1) if total > 0 else 0, + "type_count": len(by_type), + "novelty_histogram": novelty_histogram, + "by_type": by_type, + "top_novel": top_novel, + "ideas_per_draft_hist": ideas_per_draft_hist, + "top_idea_drafts": top_idea_drafts, + "cross_tab": cross_tab, + "sources": sources, + "shared_ideas": shared_ideas, + "scatter_data": scatter_data, + "sunburst": { + "labels": sunburst_labels, + "parents": sunburst_parents, + "values": sunburst_values, + }, + } + + + + +def get_source_comparison(db: Database) -> dict: + """Cross-source comparison: ratings, categories, counts by standards body.""" + pairs_all = db.drafts_with_ratings(limit=2000) + # Also include false positives for completeness of source counts + pairs_fp = db.drafts_with_ratings(limit=2000, include_false_positives=True) + + # Build per-source data + source_stats: dict[str, dict] = {} + source_categories: dict[str, Counter] = defaultdict(Counter) + source_ratings: dict[str, dict[str, list]] = defaultdict(lambda: { + "novelty": [], "maturity": [], "overlap": [], "momentum": [], "relevance": [], "scores": [], + }) + # Collect author counts per source + all_authors_by_source: dict[str, set] = defaultdict(set) + + for draft, rating in pairs_all: + src = getattr(draft, "source", "ietf") or "ietf" + source_ratings[src]["novelty"].append(rating.novelty) + source_ratings[src]["maturity"].append(rating.maturity) + source_ratings[src]["overlap"].append(rating.overlap) + source_ratings[src]["momentum"].append(rating.momentum) + source_ratings[src]["relevance"].append(rating.relevance) + source_ratings[src]["scores"].append(round(rating.composite_score, 2)) + for cat in rating.categories: + source_categories[src][cat] += 1 + + # Get all drafts (including unrated) for draft counts + all_drafts = db.list_drafts(limit=5000) + source_draft_counts: Counter = Counter() + for d in all_drafts: + src = getattr(d, "source", "ietf") or "ietf" + source_draft_counts[src] += 1 + + # Author counts by source + try: + rows = db.conn.execute( + """SELECT d.source, COUNT(DISTINCT da.person_id) as author_count + FROM drafts d + JOIN draft_authors da ON d.name = da.draft_name + GROUP BY d.source""" + ).fetchall() + for r in rows: + src = r["source"] or "ietf" + all_authors_by_source[src] = r["author_count"] + except Exception: + pass + + # Idea counts by source + source_idea_counts: Counter = Counter() + try: + rows = db.conn.execute( + """SELECT d.source, COUNT(*) as idea_count + FROM ideas i + JOIN drafts d ON i.draft_name = d.name + GROUP BY d.source""" + ).fetchall() + for r in rows: + src = r["source"] or "ietf" + source_idea_counts[src] = r["idea_count"] + except Exception: + pass + + # Build summary table + all_sources = sorted(set(source_draft_counts.keys()) | set(source_ratings.keys())) + summary = [] + for src in all_sources: + rats = source_ratings.get(src, {"scores": []}) + cats = source_categories.get(src, Counter()) + top_cat = cats.most_common(1)[0][0] if cats else "N/A" + avg_score = round(sum(rats["scores"]) / len(rats["scores"]), 2) if rats["scores"] else 0.0 + summary.append({ + "source": src, + "drafts": source_draft_counts.get(src, 0), + "rated": len(rats["scores"]), + "authors": all_authors_by_source.get(src, 0), + "ideas": source_idea_counts.get(src, 0), + "avg_score": avg_score, + "top_category": top_cat, + }) + + # Radar data: average of each dimension per source + radar = {} + for src, rats in source_ratings.items(): + if not rats["scores"]: + continue + n = len(rats["scores"]) + radar[src] = { + "novelty": round(sum(rats["novelty"]) / n, 2), + "maturity": round(sum(rats["maturity"]) / n, 2), + "overlap": round(sum(rats["overlap"]) / n, 2), + "momentum": round(sum(rats["momentum"]) / n, 2), + "relevance": round(sum(rats["relevance"]) / n, 2), + "count": n, + } + + # Category distribution by source (for stacked bar / heatmap) + all_cats = sorted({cat for cats in source_categories.values() for cat in cats}) + heatmap = { + "sources": list(source_categories.keys()), + "categories": all_cats, + "values": [], + } + for src in heatmap["sources"]: + row = [source_categories[src].get(cat, 0) for cat in all_cats] + heatmap["values"].append(row) + + # Unique/shared categories analysis + source_cat_sets = {src: set(cats.keys()) for src, cats in source_categories.items()} + unique_cats = {} + for src, cats in source_cat_sets.items(): + others = set() + for s2, c2 in source_cat_sets.items(): + if s2 != src: + others |= c2 + unique_cats[src] = sorted(cats - others) + + shared_cats = set() + for src, cats in source_cat_sets.items(): + for s2, c2 in source_cat_sets.items(): + if s2 != src: + shared_cats |= (cats & c2) + shared_cats = sorted(shared_cats) + + return { + "summary": summary, + "radar": radar, + "heatmap": heatmap, + "unique_categories": unique_cats, + "shared_categories": shared_cats, + } + + +def get_false_positive_profile(db: Database) -> dict: + """Profile drafts flagged as false positives.""" + # Get false positives + fp_rows = db.conn.execute( + """SELECT d.*, r.novelty, r.maturity, r.overlap, r.momentum, r.relevance, + r.summary, r.categories as r_categories, r.false_positive + FROM drafts d + JOIN ratings r ON d.name = r.draft_name + WHERE r.false_positive = 1 + ORDER BY d.name""" + ).fetchall() + + # Get non-FP rated drafts for comparison + nonfp_rows = db.conn.execute( + """SELECT r.novelty, r.maturity, r.overlap, r.momentum, r.relevance, + r.categories as r_categories + FROM ratings r + WHERE COALESCE(r.false_positive, 0) = 0""" + ).fetchall() + + total_rated = db.conn.execute("SELECT COUNT(*) FROM ratings").fetchone()[0] + total_drafts = db.count_drafts(include_false_positives=True) + + # Build FP list + fp_list = [] + fp_categories: Counter = Counter() + fp_sources: Counter = Counter() + fp_dims = {"novelty": [], "maturity": [], "overlap": [], "momentum": [], "relevance": []} + + for row in fp_rows: + cats = json.loads(row["r_categories"]) if row["r_categories"] else [] + src = row["source"] or "ietf" + fp_list.append({ + "name": row["name"], + "title": row["title"], + "source": src, + "categories": cats, + "relevance": row["relevance"], + "novelty": row["novelty"], + "maturity": row["maturity"], + "overlap": row["overlap"], + "momentum": row["momentum"], + "summary": row["summary"] or "", + }) + for cat in cats: + fp_categories[cat] += 1 + fp_sources[src] += 1 + fp_dims["novelty"].append(row["novelty"]) + fp_dims["maturity"].append(row["maturity"]) + fp_dims["overlap"].append(row["overlap"]) + fp_dims["momentum"].append(row["momentum"]) + fp_dims["relevance"].append(row["relevance"]) + + # Non-FP dimensions for comparison + nonfp_dims = {"novelty": [], "maturity": [], "overlap": [], "momentum": [], "relevance": []} + nonfp_categories: Counter = Counter() + for row in nonfp_rows: + nonfp_dims["novelty"].append(row["novelty"]) + nonfp_dims["maturity"].append(row["maturity"]) + nonfp_dims["overlap"].append(row["overlap"]) + nonfp_dims["momentum"].append(row["momentum"]) + nonfp_dims["relevance"].append(row["relevance"]) + cats = json.loads(row["r_categories"]) if row["r_categories"] else [] + for cat in cats: + nonfp_categories[cat] += 1 + + # Top terms from FP abstracts + from collections import Counter as _Counter + stop_words = { + "the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for", + "of", "with", "by", "from", "is", "it", "that", "this", "are", "was", + "be", "as", "can", "may", "will", "not", "has", "have", "been", "which", + "their", "its", "also", "such", "these", "would", "should", "could", + "more", "other", "than", "into", "about", "between", "over", "after", + "all", "one", "two", "new", "they", "we", "our", "each", "some", "any", + "there", "what", "when", "how", "where", "who", "does", "do", "did", + "no", "if", "so", "up", "out", "only", "used", "using", "use", "based", + "through", "both", "well", "within", "must", "while", "had", "were", + } + word_counter: Counter = Counter() + for row in fp_rows: + abstract = (row["abstract"] or "").lower() + title = (row["title"] or "").lower() + text = abstract + " " + title + words = re.findall(r'[a-z]{3,}', text) + for w in words: + if w not in stop_words: + word_counter[w] += 1 + top_terms = word_counter.most_common(30) + + return { + "count": len(fp_list), + "total_rated": total_rated, + "total_drafts": total_drafts, + "pct_of_total": round(100 * len(fp_list) / total_drafts, 1) if total_drafts else 0, + "pct_of_rated": round(100 * len(fp_list) / total_rated, 1) if total_rated else 0, + "fp_list": fp_list, + "fp_categories": dict(fp_categories.most_common()), + "fp_sources": dict(fp_sources.most_common()), + "fp_dims": fp_dims, + "nonfp_dims": nonfp_dims, + "top_terms": top_terms, + "nonfp_categories": dict(nonfp_categories.most_common(20)), + } + + +def get_ask_search(db: Database, question: str, top_k: int = 5) -> dict: + """Search-only (free) — returns sources + cached answer if available.""" + config = Config.load() + searcher = HybridSearch(config, db) + return searcher.search_only(question, top_k=top_k) + + +def get_ask_synthesize(db: Database, question: str, top_k: int = 5, cheap: bool = True) -> dict: + """Run Claude synthesis (costs tokens, result is cached permanently).""" + config = Config.load() + searcher = HybridSearch(config, db) + return searcher.ask(question, top_k=top_k, cheap=cheap) + + +def get_citation_influence(db: Database) -> dict: + """Return citation influence analysis data (cached for 5 min).""" + return _cached("citation_influence", lambda: _compute_citation_influence(db)) + + +def _compute_citation_influence(db: Database) -> dict: + """Compute citation influence metrics from the draft_refs table. + + Returns dict with: + - top_cited_rfcs: top 20 most-cited RFCs with citation counts and citing drafts + - top_citing_drafts: top 20 drafts that cite the most references + - citations_by_category: average citations per category + - stats: total citations, unique RFCs, avg refs per draft + - draft_network: draft-to-draft citation edges for visualization + """ + # Get all references + rows = db.conn.execute( + "SELECT draft_name, ref_type, ref_id FROM draft_refs" + ).fetchall() + + # Get draft titles and categories + draft_rows = db.conn.execute("SELECT name, title FROM drafts").fetchall() + draft_titles = {r["name"]: r["title"] for r in draft_rows} + + rating_rows = db.conn.execute("SELECT draft_name, categories FROM ratings").fetchall() + draft_cats: dict[str, str] = {} + for r in rating_rows: + try: + cats = json.loads(r["categories"]) if r["categories"] else [] + draft_cats[r["draft_name"]] = cats[0] if cats else "Other" + except Exception: + draft_cats[r["draft_name"]] = "Other" + + # Well-known RFC names + rfc_names = { + "2119": "Key words (MUST/SHALL/MAY)", "8174": "Key words update", + "8259": "JSON", "7519": "JWT", "6749": "OAuth 2.0", + "7540": "HTTP/2", "9110": "HTTP Semantics", "7525": "TLS Recommendations", + "8446": "TLS 1.3", "3986": "URIs", "7230": "HTTP/1.1 Syntax", + "7231": "HTTP/1.1 Semantics", "8288": "Web Linking", "6125": "TLS Server Identity", + "7515": "JWS", "7516": "JWE", "7517": "JWK", "7518": "JWA", + "9449": "DPoP", "6750": "OAuth Bearer", "8725": "JWT Best Practices", + "9396": "Rich Authorization Requests", "9101": "JAR", + "8414": "OAuth Server Metadata", "7591": "Dynamic Client Registration", + "8705": "mTLS for OAuth", "9068": "JWT Access Tokens", + "6819": "OAuth Threat Model", "9200": "ACE-OAuth", "9052": "COSE", + "8392": "CWT", "7252": "CoAP", + } + + # In-degree: how many times each RFC is cited + rfc_citations: dict[str, list[str]] = defaultdict(list) + draft_out_count: dict[str, int] = Counter() + draft_to_draft_edges = [] + total_citations = 0 + + for r in rows: + draft_name = r["draft_name"] + ref_type = r["ref_type"] + ref_id = r["ref_id"] + total_citations += 1 + draft_out_count[draft_name] += 1 + + if ref_type == "rfc": + rfc_citations[ref_id].append(draft_name) + elif ref_type == "draft": + draft_to_draft_edges.append({ + "source": draft_name, + "target": ref_id, + "source_title": draft_titles.get(draft_name, draft_name), + "target_title": draft_titles.get(ref_id, ref_id), + }) + + # Top 20 most-cited RFCs + rfc_sorted = sorted(rfc_citations.items(), key=lambda x: len(x[1]), reverse=True) + top_cited_rfcs = [] + for ref_id, citing_drafts in rfc_sorted[:20]: + top_cited_rfcs.append({ + "rfc_id": ref_id, + "name": rfc_names.get(ref_id, ""), + "count": len(citing_drafts), + "drafts": citing_drafts[:10], # Limit to first 10 for display + "total_drafts": len(citing_drafts), + }) + + # Top 20 most-citing drafts (out-degree) + draft_sorted = sorted(draft_out_count.items(), key=lambda x: x[1], reverse=True) + top_citing_drafts = [] + for draft_name, count in draft_sorted[:20]: + top_citing_drafts.append({ + "name": draft_name, + "title": draft_titles.get(draft_name, draft_name), + "count": count, + "category": draft_cats.get(draft_name, "Other"), + }) + + # Citation density by category + cat_totals: dict[str, int] = Counter() + cat_counts: dict[str, int] = Counter() + for draft_name, count in draft_out_count.items(): + cat = draft_cats.get(draft_name, "Other") + cat_totals[cat] += count + cat_counts[cat] += 1 + + citations_by_category = [] + for cat in sorted(cat_totals.keys()): + avg = cat_totals[cat] / cat_counts[cat] if cat_counts[cat] > 0 else 0 + citations_by_category.append({ + "category": cat, + "total_citations": cat_totals[cat], + "draft_count": cat_counts[cat], + "avg_citations": round(avg, 1), + }) + citations_by_category.sort(key=lambda x: x["avg_citations"], reverse=True) + + # PageRank-style influence: drafts that cite highly-cited RFCs + # Simple approximation: sum of (1 / citation_count) for each RFC cited + rfc_influence = {rid: len(drafts) for rid, drafts in rfc_citations.items()} + draft_pagerank: dict[str, float] = Counter() + for r in rows: + if r["ref_type"] == "rfc" and r["ref_id"] in rfc_influence: + # Higher score for citing highly-cited RFCs + draft_pagerank[r["draft_name"]] += rfc_influence[r["ref_id"]] + + pagerank_sorted = sorted(draft_pagerank.items(), key=lambda x: x[1], reverse=True) + top_pagerank = [] + for draft_name, score in pagerank_sorted[:20]: + top_pagerank.append({ + "name": draft_name, + "title": draft_titles.get(draft_name, draft_name), + "score": round(score, 1), + "category": draft_cats.get(draft_name, "Other"), + "out_degree": draft_out_count.get(draft_name, 0), + }) + + # Stats + unique_rfcs = len(rfc_citations) + drafts_with_refs = len(draft_out_count) + avg_refs = total_citations / drafts_with_refs if drafts_with_refs > 0 else 0 + + return { + "top_cited_rfcs": top_cited_rfcs, + "top_citing_drafts": top_citing_drafts, + "top_pagerank": top_pagerank, + "citations_by_category": citations_by_category, + "draft_network": draft_to_draft_edges[:200], # Limit for perf + "stats": { + "total_citations": total_citations, + "unique_rfcs": unique_rfcs, + "drafts_with_refs": drafts_with_refs, + "avg_refs_per_draft": round(avg_refs, 1), + }, + } + + +def get_bcp_analysis(db: Database) -> dict: + """Return BCP dependency analysis data (cached for 5 min).""" + return _cached("bcp_analysis", lambda: _compute_bcp_analysis(db)) + + +def _compute_bcp_analysis(db: Database) -> dict: + """Compute BCP dependency analysis. + + Returns dict with: + - bcps: all BCPs with citation counts and citing drafts + - co_citation: which BCPs tend to be co-cited + - by_category: BCP citation patterns by category + - coverage: what % of drafts cite at least one BCP + """ + # Get all BCP references + bcp_rows = db.conn.execute( + "SELECT draft_name, ref_id FROM draft_refs WHERE ref_type = 'bcp'" + ).fetchall() + + # Get draft titles and categories + draft_rows = db.conn.execute("SELECT name, title FROM drafts").fetchall() + draft_titles = {r["name"]: r["title"] for r in draft_rows} + total_drafts = len(draft_titles) + + rating_rows = db.conn.execute("SELECT draft_name, categories FROM ratings").fetchall() + draft_cats: dict[str, str] = {} + for r in rating_rows: + try: + cats = json.loads(r["categories"]) if r["categories"] else [] + draft_cats[r["draft_name"]] = cats[0] if cats else "Other" + except Exception: + draft_cats[r["draft_name"]] = "Other" + + # BCP citation counts + bcp_citations: dict[str, list[str]] = defaultdict(list) + draft_bcps: dict[str, list[str]] = defaultdict(list) + + for r in bcp_rows: + bcp_citations[r["ref_id"]].append(r["draft_name"]) + draft_bcps[r["draft_name"]].append(r["ref_id"]) + + # All BCPs with counts + bcps = [] + for bcp_id, citing_drafts in sorted(bcp_citations.items(), + key=lambda x: len(x[1]), reverse=True): + bcps.append({ + "bcp_id": bcp_id, + "count": len(citing_drafts), + "drafts": citing_drafts[:10], + "total_drafts": len(citing_drafts), + }) + + # Co-citation matrix: which BCPs appear together in the same draft + bcp_ids = sorted(bcp_citations.keys()) + co_citation = [] + for i, bcp_a in enumerate(bcp_ids): + drafts_a = set(bcp_citations[bcp_a]) + for j, bcp_b in enumerate(bcp_ids): + if j <= i: + continue + drafts_b = set(bcp_citations[bcp_b]) + shared = len(drafts_a & drafts_b) + if shared > 0: + co_citation.append({ + "bcp_a": bcp_a, + "bcp_b": bcp_b, + "count": shared, + }) + + # Heatmap data: full matrix for all BCPs (top 20 by citation count) + top_bcp_ids = [b["bcp_id"] for b in bcps[:20]] + heatmap_matrix = [] + for bcp_a in top_bcp_ids: + row = [] + drafts_a = set(bcp_citations.get(bcp_a, [])) + for bcp_b in top_bcp_ids: + drafts_b = set(bcp_citations.get(bcp_b, [])) + shared = len(drafts_a & drafts_b) + row.append(shared) + heatmap_matrix.append(row) + + # BCP citations by category + cat_bcp_count: dict[str, Counter] = defaultdict(Counter) + for draft_name, bcp_list in draft_bcps.items(): + cat = draft_cats.get(draft_name, "Other") + for bcp_id in bcp_list: + cat_bcp_count[cat][bcp_id] += 1 + + by_category = [] + for cat in sorted(cat_bcp_count.keys()): + top_bcps = cat_bcp_count[cat].most_common(5) + by_category.append({ + "category": cat, + "total_bcp_refs": sum(cat_bcp_count[cat].values()), + "unique_bcps": len(cat_bcp_count[cat]), + "top_bcps": [{"bcp_id": bid, "count": c} for bid, c in top_bcps], + }) + by_category.sort(key=lambda x: x["total_bcp_refs"], reverse=True) + + # Coverage + drafts_with_bcp = len(draft_bcps) + coverage_pct = (drafts_with_bcp / total_drafts * 100) if total_drafts > 0 else 0 + + return { + "bcps": bcps, + "co_citation": co_citation, + "heatmap_labels": top_bcp_ids, + "heatmap_matrix": heatmap_matrix, + "by_category": by_category, + "coverage": { + "total_drafts": total_drafts, + "drafts_with_bcp": drafts_with_bcp, + "coverage_pct": round(coverage_pct, 1), + "unique_bcps": len(bcp_citations), + "total_bcp_refs": len(bcp_rows), + }, + } + + +def global_search(db: Database, query: str) -> SearchResults: + """Search across drafts (FTS5), ideas, authors, and gaps. + + Returns {drafts: [...], ideas: [...], authors: [...], gaps: [...]}. + """ + results: dict = {"drafts": [], "ideas": [], "authors": [], "gaps": []} + if not query or not query.strip(): + return results + + q = query.strip() + + # 1. Drafts via FTS5 + try: + fts_query = re.sub(r'[^\w\s]', '', q) + fts_query = re.sub(r'\b(NEAR|OR|AND|NOT)\b', '', fts_query, flags=re.IGNORECASE) + fts_query = re.sub(r'\s+', ' ', fts_query).strip() + if not fts_query: + raise ValueError("empty query after sanitization") + rows = db.conn.execute( + """SELECT d.name, d.title, d.abstract, d.time, d."group" + FROM drafts d + JOIN drafts_fts f ON d.rowid = f.rowid + WHERE drafts_fts MATCH ? + ORDER BY rank + LIMIT 50""", + (fts_query,), + ).fetchall() + for r in rows: + results["drafts"].append({ + "name": r["name"], + "title": r["title"], + "abstract": (r["abstract"] or "")[:200], + "date": r["time"], + "group": r["group"] or "individual", + }) + except Exception: + # FTS5 match can fail on certain query syntax; fall back to LIKE + like = f"%{q}%" + rows = db.conn.execute( + """SELECT name, title, abstract, time, "group" FROM drafts + WHERE title LIKE ? OR name LIKE ? OR abstract LIKE ? + LIMIT 50""", + (like, like, like), + ).fetchall() + for r in rows: + results["drafts"].append({ + "name": r["name"], + "title": r["title"], + "abstract": (r["abstract"] or "")[:200], + "date": r["time"], + "group": r["group"] or "individual", + }) + + # 2. Ideas via LIKE + like = f"%{q}%" + rows = db.conn.execute( + """SELECT id, title, description, idea_type, draft_name FROM ideas + WHERE title LIKE ? OR description LIKE ? + ORDER BY id LIMIT 50""", + (like, like), + ).fetchall() + for r in rows: + results["ideas"].append({ + "id": r["id"], + "title": r["title"], + "description": (r["description"] or "")[:200], + "type": r["idea_type"], + "draft_name": r["draft_name"], + }) + + # 3. Authors via LIKE + rows = db.conn.execute( + """SELECT person_id, name, affiliation FROM authors + WHERE name LIKE ? OR affiliation LIKE ? + ORDER BY name LIMIT 50""", + (like, like), + ).fetchall() + for r in rows: + results["authors"].append({ + "person_id": r["person_id"], + "name": r["name"], + "affiliation": r["affiliation"] or "", + }) + + # 4. Gaps via LIKE + rows = db.conn.execute( + """SELECT id, topic, description, category, severity FROM gaps + WHERE topic LIKE ? OR description LIKE ? + ORDER BY id LIMIT 50""", + (like, like), + ).fetchall() + for r in rows: + results["gaps"].append({ + "id": r["id"], + "topic": r["topic"], + "description": (r["description"] or "")[:200], + "category": r["category"], + "severity": r["severity"], + }) + + return results + + +def get_landscape_tsne(db: Database) -> list[dict]: + """Compute t-SNE (cached for 5 min).""" + return _cached("landscape_tsne", lambda: _compute_landscape_tsne(db)) + + +def _compute_landscape_tsne(db: Database) -> list[dict]: + """Compute t-SNE from embeddings, return [{name, title, x, y, category, score}].""" + + + embeddings = db.all_embeddings() + if len(embeddings) < 5: + return [] + + pairs = db.drafts_with_ratings(limit=1000) + rating_map = {d.name: r for d, r in pairs} + draft_map = {d.name: d for d, _ in pairs} + + # Filter to drafts that have both embeddings and ratings + names = [n for n in embeddings if n in rating_map] + if len(names) < 5: + return [] + + matrix = np.array([embeddings[n] for n in names]) + + try: + tsne = TSNE(n_components=2, perplexity=min(30, len(names) - 1), + random_state=42, max_iter=500) + coords = tsne.fit_transform(matrix) + except Exception: + return [] + + result = [] + for i, name in enumerate(names): + r = rating_map[name] + d = draft_map.get(name) + result.append({ + "name": name, + "title": d.title if d else name, + "x": round(float(coords[i, 0]), 3), + "y": round(float(coords[i, 1]), 3), + "category": r.categories[0] if r.categories else "Other", + "score": round(r.composite_score, 2), + }) + return result + + +def get_comparison_data(db: Database, names: list[str]) -> dict | None: + """Get comparison data for a list of drafts. + + Returns { + drafts: [{name, title, abstract, rating, ideas, refs, ...}], + shared_ideas: [{title, drafts: [name,...]}], + unique_ideas: {name: [{title, description}]}, + shared_refs: [{type, id, drafts: [name,...]}], + unique_refs: {name: [{type, id}]}, + similarities: [{a, b, similarity}], + comparison_text: str | None, + } + """ + + + drafts_data = [] + all_ideas: dict[str, list[dict]] = {} + all_refs: dict[str, list[tuple[str, str]]] = {} + + for name in names: + detail = get_draft_detail(db, name) + if not detail: + continue + drafts_data.append(detail) + all_ideas[name] = detail.get("ideas", []) + all_refs[name] = [(r["type"], r["id"]) for r in detail.get("refs", [])] + + if len(drafts_data) < 2: + return None + + # Find shared vs unique ideas (by title similarity) + idea_title_drafts: dict[str, list[str]] = {} + for name, ideas in all_ideas.items(): + for idea in ideas: + title_lower = idea["title"].lower().strip() + if title_lower not in idea_title_drafts: + idea_title_drafts[title_lower] = [] + idea_title_drafts[title_lower].append(name) + + shared_ideas = [ + {"title": title, "drafts": draft_list} + for title, draft_list in idea_title_drafts.items() + if len(set(draft_list)) > 1 + ] + unique_ideas: dict[str, list[dict]] = {} + for name, ideas in all_ideas.items(): + unique = [] + for idea in ideas: + title_lower = idea["title"].lower().strip() + if len(set(idea_title_drafts.get(title_lower, []))) <= 1: + unique.append({"title": idea["title"], "description": idea.get("description", "")}) + unique_ideas[name] = unique + + # Find shared vs unique references + ref_drafts: dict[tuple[str, str], list[str]] = {} + for name, refs in all_refs.items(): + for ref in refs: + if ref not in ref_drafts: + ref_drafts[ref] = [] + ref_drafts[ref].append(name) + + shared_refs = [ + {"type": ref[0], "id": ref[1], "drafts": draft_list} + for ref, draft_list in ref_drafts.items() + if len(set(draft_list)) > 1 + ] + unique_refs: dict[str, list[dict]] = {} + for name, refs in all_refs.items(): + unique = [] + for ref in refs: + if len(set(ref_drafts.get(ref, []))) <= 1: + unique.append({"type": ref[0], "id": ref[1]}) + unique_refs[name] = unique + + # Pairwise embedding similarities + embeddings = db.all_embeddings() + similarities = [] + valid_names = [d["name"] for d in drafts_data] + for i in range(len(valid_names)): + for j in range(i + 1, len(valid_names)): + a, b = valid_names[i], valid_names[j] + if a in embeddings and b in embeddings: + vec_a = embeddings[a] + vec_b = embeddings[b] + dot = np.dot(vec_a, vec_b) + norm = np.linalg.norm(vec_a) * np.linalg.norm(vec_b) + sim = float(dot / norm) if norm > 0 else 0.0 + similarities.append({"a": a, "b": b, "similarity": round(sim, 4)}) + + return { + "drafts": drafts_data, + "shared_ideas": shared_ideas, + "unique_ideas": unique_ideas, + "shared_refs": shared_refs, + "unique_refs": unique_refs, + "similarities": similarities, + "comparison_text": None, + } + + +def get_ask_search(db: Database, question: str, top_k: int = 5) -> dict: + """Search-only (free) — returns sources + cached answer if available.""" + config = Config.load() + searcher = HybridSearch(config, db) + return searcher.search_only(question, top_k=top_k) + + +def get_ask_synthesize(db: Database, question: str, top_k: int = 5, cheap: bool = True) -> dict: + """Run Claude synthesis (costs tokens, result is cached permanently).""" + config = Config.load() + searcher = HybridSearch(config, db) + return searcher.ask(question, top_k=top_k, cheap=cheap)