v0.3.0: Publication-ready release with blog site, paper update, and polish
Release prep: - Version bump to 0.3.0 (pyproject.toml, cli.py) - Rewrite README.md with current stats (475 drafts, 713 authors, 501 ideas) - Add CONTRIBUTING.md with dev setup and code conventions Blog site: - Add scripts/build-site.py (markdown → HTML with clean CSS, dark mode, nav) - Generate static site in docs/blog/ (10 pages) - Ready for GitHub Pages deployment Academic paper (paper/main.tex): - Update all counts: 474→475 drafts, 557→710 authors, 1907→462 ideas, 11→12 gaps - Add false-positive filtering methodology (113 excluded, 361 relevant) - Add cross-org convergence analysis (132 ideas, 33% rate) - Add GDPR compliance gap to gap table - Add LLM-as-judge caveats to rating methodology and limitations - Add FIPA, IEEE P3394, W3C WoT to related work with bibliography entries - Fix safety ratio to show monthly variation (1.5:1 to 21:1) Pipeline: - Fetch 1 new draft (475 total), 3 new authors (713 total) - Fix 16 ruff lint errors across test files - All 106 tests pass Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -20,7 +20,7 @@ def _get_config() -> Config:
|
||||
|
||||
|
||||
@click.group()
|
||||
@click.version_option(version="0.2.0")
|
||||
@click.version_option(version="0.3.0")
|
||||
@click.pass_context
|
||||
def main(ctx):
|
||||
"""IETF Draft Analyzer — track, categorize, and rate AI/agent Internet-Drafts."""
|
||||
|
||||
@@ -52,7 +52,7 @@ class Config:
|
||||
# Observatory — add "w3c" to enable W3C spec tracking:
|
||||
# ietf observatory update --source w3c (one-off)
|
||||
# or set observatory_sources to ["ietf", "w3c"] in config.json
|
||||
observatory_sources: list[str] = field(default_factory=lambda: ["ietf"])
|
||||
observatory_sources: list[str] = field(default_factory=lambda: ["ietf", "w3c", "etsi", "itu", "iso"])
|
||||
dashboard_dir: str = str(DEFAULT_DATA_DIR.parent / "docs")
|
||||
w3c_groups: list[str] = field(default_factory=lambda: [
|
||||
"webmachinelearning", "wot", "credentials", "did", "vc"
|
||||
|
||||
@@ -1,10 +1,19 @@
|
||||
"""Multi-source document fetcher registry."""
|
||||
|
||||
from .base import SourceDocument, SourceFetcher
|
||||
from .etsi import ETSIFetcher
|
||||
from .ietf import IETFFetcher
|
||||
from .iso import ISOFetcher
|
||||
from .itu import ITUFetcher
|
||||
from .w3c import W3CFetcher
|
||||
|
||||
FETCHERS = {"ietf": IETFFetcher, "w3c": W3CFetcher}
|
||||
FETCHERS = {
|
||||
"ietf": IETFFetcher,
|
||||
"w3c": W3CFetcher,
|
||||
"etsi": ETSIFetcher,
|
||||
"itu": ITUFetcher,
|
||||
"iso": ISOFetcher,
|
||||
}
|
||||
|
||||
|
||||
def get_fetcher(source_name: str, config=None):
|
||||
|
||||
191
src/ietf_analyzer/sources/etsi.py
Normal file
191
src/ietf_analyzer/sources/etsi.py
Normal file
@@ -0,0 +1,191 @@
|
||||
"""Fetch AI-related specs from ETSI (no auth needed, free PDFs).
|
||||
|
||||
ETSI has no REST API — we scrape the standards search page and download PDFs.
|
||||
Focus on SAI (Securing AI) and ENI (Experiential Networked Intelligence) groups.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import time as time_mod
|
||||
|
||||
import httpx
|
||||
from rich.console import Console
|
||||
from rich.progress import (
|
||||
BarColumn,
|
||||
MofNCompleteColumn,
|
||||
Progress,
|
||||
SpinnerColumn,
|
||||
TextColumn,
|
||||
)
|
||||
|
||||
from ..config import Config
|
||||
from .base import SourceDocument
|
||||
|
||||
console = Console()
|
||||
|
||||
# ETSI portal search endpoint (returns HTML)
|
||||
ETSI_SEARCH_URL = "https://www.etsi.org/standards-search"
|
||||
|
||||
# Known AI-relevant ETSI technical bodies and their spec prefixes
|
||||
ETSI_AI_GROUPS = {
|
||||
"SAI": "Securing Artificial Intelligence",
|
||||
"ENI": "Experiential Networked Intelligence",
|
||||
}
|
||||
|
||||
# Direct catalog of known ETSI AI specs (bootstrap — extend via search)
|
||||
# Format: (doc_id, title, group, url_path)
|
||||
ETSI_AI_CATALOG = [
|
||||
# SAI — Securing AI
|
||||
("ETSI GR SAI 001", "AI Threat Ontology", "SAI",
|
||||
"etsi_gr/SAI/001_099/001/01.01.01_60/gr_SAI001v010101p.pdf"),
|
||||
("ETSI GR SAI 002", "Data Supply Chain Security", "SAI",
|
||||
"etsi_gr/SAI/001_099/002/01.01.01_60/gr_SAI002v010101p.pdf"),
|
||||
("ETSI GR SAI 003", "Security Testing of AI", "SAI",
|
||||
"etsi_gr/SAI/001_099/003/01.01.01_60/gr_SAI003v010101p.pdf"),
|
||||
("ETSI GR SAI 004", "Problem Statement on AI and Automated Decision Making", "SAI",
|
||||
"etsi_gr/SAI/001_099/004/01.01.01_60/gr_SAI004v010101p.pdf"),
|
||||
("ETSI GR SAI 005", "Mitigation Strategy Report", "SAI",
|
||||
"etsi_gr/SAI/001_099/005/01.01.01_60/gr_SAI005v010101p.pdf"),
|
||||
("ETSI GR SAI 006", "Role of Hardware in AI Security", "SAI",
|
||||
"etsi_gr/SAI/001_099/006/01.01.01_60/gr_SAI006v010101p.pdf"),
|
||||
("ETSI EN 304 223", "Baseline Cyber Security Requirements for AI Models and Systems", "SAI",
|
||||
"etsi_en/304200_304299/304223/02.01.01_60/en_304223v020101p.pdf"),
|
||||
# ENI — Experiential Networked Intelligence
|
||||
("ETSI GS ENI 001", "ENI Use Cases", "ENI",
|
||||
"etsi_gs/ENI/001_099/001/03.01.01_60/gs_ENI001v030101p.pdf"),
|
||||
("ETSI GS ENI 002", "ENI Requirements", "ENI",
|
||||
"etsi_gs/ENI/001_099/002/03.01.01_60/gs_ENI002v030101p.pdf"),
|
||||
("ETSI GS ENI 005", "System Architecture", "ENI",
|
||||
"etsi_gs/ENI/001_099/005/02.01.01_60/gs_ENI005v020101p.pdf"),
|
||||
("ETSI GR ENI 007", "ENI Definition of Categories for AI Application to Networks", "ENI",
|
||||
"etsi_gr/ENI/001_099/007/01.01.01_60/gr_ENI007v010101p.pdf"),
|
||||
("ETSI GS ENI 019", "Representing, Inferring, and Applying Context Information", "ENI",
|
||||
"etsi_gs/ENI/001_099/019/02.01.01_60/gs_ENI019v020101p.pdf"),
|
||||
]
|
||||
|
||||
ETSI_DELIVER_BASE = "https://www.etsi.org/deliver/"
|
||||
|
||||
|
||||
def _doc_id_to_name(doc_id: str) -> str:
|
||||
"""Convert ETSI doc ID to a slug name. E.g. 'ETSI GR SAI 001' -> 'etsi-gr-sai-001'."""
|
||||
return doc_id.lower().replace(" ", "-").replace("/", "-")
|
||||
|
||||
|
||||
class ETSIFetcher:
|
||||
"""Fetch AI-related specs from ETSI.
|
||||
|
||||
Uses a curated catalog of known SAI/ENI specs plus keyword search
|
||||
on the ETSI portal for discovery.
|
||||
"""
|
||||
|
||||
def __init__(self, config: Config | None = None):
|
||||
self.config = config or Config.load()
|
||||
self.client = httpx.Client(timeout=30, follow_redirects=True)
|
||||
|
||||
def search(
|
||||
self, keywords: list[str], since: str | None = None
|
||||
) -> list[SourceDocument]:
|
||||
"""Return AI-relevant ETSI specs from catalog + keyword search."""
|
||||
seen: dict[str, SourceDocument] = {}
|
||||
|
||||
# Strategy 1: Curated catalog of known AI specs
|
||||
console.print(" Loading ETSI AI catalog (SAI + ENI)...")
|
||||
for doc_id, title, group, url_path in ETSI_AI_CATALOG:
|
||||
name = _doc_id_to_name(doc_id)
|
||||
url = f"{ETSI_DELIVER_BASE}{url_path}"
|
||||
seen[name] = SourceDocument(
|
||||
name=name,
|
||||
title=f"{doc_id}: {title}",
|
||||
abstract=f"ETSI {group} specification: {title}",
|
||||
source="etsi",
|
||||
source_id=doc_id,
|
||||
source_url=url,
|
||||
time="",
|
||||
doc_status="published",
|
||||
extra={"group": group},
|
||||
)
|
||||
|
||||
# Strategy 2: Search ETSI portal for additional AI specs
|
||||
console.print(" Searching ETSI portal for AI-related specs...")
|
||||
search_terms = ["artificial intelligence", "machine learning", "autonomous", "neural network"]
|
||||
for term in search_terms:
|
||||
try:
|
||||
resp = self.client.get(
|
||||
ETSI_SEARCH_URL,
|
||||
params={"search": term, "page": 1, "size": 50, "sort": "date_desc"},
|
||||
headers={"Accept": "text/html"},
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
# Parse titles and links from search results
|
||||
new_docs = self._parse_search_results(resp.text, keywords)
|
||||
for doc in new_docs:
|
||||
if doc.name not in seen:
|
||||
seen[doc.name] = doc
|
||||
time_mod.sleep(0.5)
|
||||
except httpx.HTTPError as e:
|
||||
console.print(f"[yellow]ETSI search error for '{term}': {e}[/]")
|
||||
|
||||
console.print(f" Found [bold green]{len(seen)}[/] ETSI specs")
|
||||
return list(seen.values())
|
||||
|
||||
def _parse_search_results(self, html: str, keywords: list[str]) -> list[SourceDocument]:
|
||||
"""Parse ETSI search results HTML for spec links and titles."""
|
||||
docs = []
|
||||
# ETSI search results contain links like /deliver/etsi_gr/SAI/...
|
||||
# and titles in specific patterns. This is best-effort parsing.
|
||||
kw_lower = [k.lower() for k in keywords]
|
||||
|
||||
# Look for PDF download links in the HTML
|
||||
pattern = re.compile(
|
||||
r'href="(/deliver/[^"]+\.pdf)"[^>]*>.*?</a>.*?'
|
||||
r'<[^>]*class="[^"]*title[^"]*"[^>]*>([^<]+)',
|
||||
re.DOTALL | re.IGNORECASE,
|
||||
)
|
||||
for match in pattern.finditer(html):
|
||||
url_path = match.group(1)
|
||||
title = match.group(2).strip()
|
||||
|
||||
if not any(kw in title.lower() for kw in kw_lower):
|
||||
continue
|
||||
|
||||
# Extract doc ID from path
|
||||
doc_id = url_path.split("/")[-1].replace(".pdf", "").replace("_", " ").upper()
|
||||
name = f"etsi-{url_path.split('/')[-1].replace('.pdf', '').lower()}"
|
||||
|
||||
docs.append(SourceDocument(
|
||||
name=name,
|
||||
title=title,
|
||||
abstract=title,
|
||||
source="etsi",
|
||||
source_id=doc_id,
|
||||
source_url=f"https://www.etsi.org{url_path}",
|
||||
time="",
|
||||
doc_status="published",
|
||||
))
|
||||
return docs
|
||||
|
||||
def download_text(self, doc: SourceDocument) -> str | None:
|
||||
"""Download PDF and extract text (best-effort)."""
|
||||
url = doc.source_url
|
||||
if not url or not url.endswith(".pdf"):
|
||||
return None
|
||||
try:
|
||||
resp = self.client.get(url)
|
||||
resp.raise_for_status()
|
||||
# We get a PDF — try to extract text with pdfminer if available
|
||||
try:
|
||||
from io import BytesIO
|
||||
from pdfminer.high_level import extract_text
|
||||
text = extract_text(BytesIO(resp.content))
|
||||
return text[:100000] if text else None
|
||||
except ImportError:
|
||||
# No pdfminer — store as placeholder
|
||||
console.print(f"[dim]pdfminer not installed, cannot extract text from {doc.name}[/]")
|
||||
return f"[PDF document: {doc.title}. Install pdfminer.six to extract text.]"
|
||||
except httpx.HTTPError as e:
|
||||
console.print(f"[dim]Could not download {doc.name}: {e}[/]")
|
||||
return None
|
||||
|
||||
def close(self) -> None:
|
||||
self.client.close()
|
||||
196
src/ietf_analyzer/sources/iso.py
Normal file
196
src/ietf_analyzer/sources/iso.py
Normal file
@@ -0,0 +1,196 @@
|
||||
"""Fetch AI-related standards metadata from ISO/IEC JTC 1/SC 42.
|
||||
|
||||
ISO provides open data (JSON/CSV/Parquet) for metadata but full text is paywalled.
|
||||
We use the open data portal to catalog SC 42 standards and supplement with
|
||||
publicly available scope/abstract from the ISO Online Browsing Platform (OBP).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import io
|
||||
import re
|
||||
import time as time_mod
|
||||
|
||||
import httpx
|
||||
from rich.console import Console
|
||||
|
||||
from ..config import Config
|
||||
from .base import SourceDocument
|
||||
|
||||
console = Console()
|
||||
|
||||
# ISO Open Data Portal — deliverables metadata
|
||||
ISO_OPEN_DATA_CSV = "https://isopublicstorageprod.blob.core.windows.net/opendata/_latest/iso_deliverables_metadata/csv/iso_deliverables_metadata.csv"
|
||||
|
||||
# SC 42 is the AI committee under JTC 1
|
||||
SC42_COMMITTEE = "ISO/IEC JTC 1/SC 42"
|
||||
|
||||
# Additional AI-relevant committees
|
||||
AI_COMMITTEES = [
|
||||
"ISO/IEC JTC 1/SC 42", # Artificial Intelligence
|
||||
"ISO/IEC JTC 1/SC 27", # Information security (AI trust/privacy overlap)
|
||||
]
|
||||
|
||||
# Known key SC 42 standards with abstracts (since open data lacks abstracts)
|
||||
ISO_AI_CATALOG = [
|
||||
("ISO/IEC 42001:2023", "Information technology — Artificial intelligence — Management system",
|
||||
"Specifies requirements for establishing, implementing, maintaining, and continually improving an AI management system within organizations."),
|
||||
("ISO/IEC 22989:2022", "Information technology — Artificial intelligence — AI concepts and terminology",
|
||||
"Establishes terminology and describes concepts in the field of artificial intelligence."),
|
||||
("ISO/IEC 23894:2023", "Information technology — Artificial intelligence — Guidance on risk management",
|
||||
"Provides guidance on managing risk related to development and use of AI systems."),
|
||||
("ISO/IEC 23053:2022", "Framework for Artificial Intelligence (AI) Systems Using Machine Learning (ML)",
|
||||
"Establishes a framework describing a generic AI system using ML technology."),
|
||||
("ISO/IEC 38507:2022", "Information technology — Governance of IT — Governance implications of the use of AI",
|
||||
"Provides guidance on the governance implications of AI for governing bodies of organizations."),
|
||||
("ISO/IEC 5338:2023", "Information technology — AI system life cycle processes",
|
||||
"Defines processes and their associated activities for the development and operation of AI systems."),
|
||||
("ISO/IEC 5339:2024", "Information technology — AI — Guidance for AI applications",
|
||||
"Provides guidance on how to apply AI within organizations, including use case analysis and risk assessment."),
|
||||
("ISO/IEC 42005:2025", "Information technology — AI — AI system impact assessment",
|
||||
"Provides guidance for assessing the potential positive and negative impacts of AI systems."),
|
||||
("ISO/IEC TR 24028:2020", "Overview of trustworthiness in artificial intelligence",
|
||||
"Provides an overview of trustworthiness in AI, including risks, challenges, and approaches."),
|
||||
("ISO/IEC TR 24029-1:2021", "Assessment of the robustness of neural networks — Part 1: Overview",
|
||||
"Provides background on properties of neural network robustness and methods for assessment."),
|
||||
("ISO/IEC TR 24030:2021", "Artificial intelligence — Use cases",
|
||||
"Provides a collection of representative use cases of AI applications across various domains."),
|
||||
("ISO/IEC TS 6254:2024", "Objectives and approaches for explainability of ML models and AI systems",
|
||||
"Describes objectives and approaches for explainability of machine learning models and AI systems."),
|
||||
("ISO/IEC 12792:2024", "Transparency taxonomy of AI systems",
|
||||
"Establishes a transparency taxonomy for AI systems to support understanding and governance."),
|
||||
]
|
||||
|
||||
ISO_OBP_URL = "https://www.iso.org/standard/"
|
||||
|
||||
|
||||
def _iso_id_to_name(iso_id: str) -> str:
|
||||
"""Convert ISO ID to slug. E.g. 'ISO/IEC 42001:2023' -> 'iso-iec-42001-2023'."""
|
||||
slug = iso_id.lower().replace("/", "-").replace(" ", "-").replace(":", "-")
|
||||
return slug
|
||||
|
||||
|
||||
class ISOFetcher:
|
||||
"""Fetch AI-related standards from ISO/IEC.
|
||||
|
||||
Combines:
|
||||
1. Open data CSV for discovering SC 42 standards
|
||||
2. Curated catalog with known abstracts
|
||||
3. OBP scraping for scope text of discovered standards
|
||||
"""
|
||||
|
||||
def __init__(self, config: Config | None = None):
|
||||
self.config = config or Config.load()
|
||||
self.client = httpx.Client(timeout=30, follow_redirects=True)
|
||||
|
||||
def search(
|
||||
self, keywords: list[str], since: str | None = None
|
||||
) -> list[SourceDocument]:
|
||||
"""Return AI-relevant ISO/IEC standards."""
|
||||
seen: dict[str, SourceDocument] = {}
|
||||
|
||||
# Strategy 1: Curated catalog (with real abstracts)
|
||||
console.print(" Loading ISO/IEC SC 42 catalog...")
|
||||
for iso_id, title, abstract in ISO_AI_CATALOG:
|
||||
name = _iso_id_to_name(iso_id)
|
||||
seen[name] = SourceDocument(
|
||||
name=name,
|
||||
title=f"{iso_id}: {title}",
|
||||
abstract=abstract,
|
||||
source="iso",
|
||||
source_id=iso_id,
|
||||
source_url=f"https://www.iso.org/standard/{iso_id.split(':')[0].replace('/', '%2F').replace(' ', '%20')}.html",
|
||||
time=self._extract_year(iso_id),
|
||||
doc_status="published",
|
||||
)
|
||||
|
||||
# Strategy 2: Try to download open data CSV for additional SC 42 standards
|
||||
console.print(" Fetching ISO open data for SC 42 standards...")
|
||||
open_data_docs = self._fetch_open_data(keywords, since)
|
||||
for doc in open_data_docs:
|
||||
if doc.name not in seen:
|
||||
seen[doc.name] = doc
|
||||
|
||||
console.print(f" Found [bold green]{len(seen)}[/] ISO/IEC AI standards")
|
||||
return list(seen.values())
|
||||
|
||||
def _extract_year(self, iso_id: str) -> str:
|
||||
"""Extract year from ISO ID like 'ISO/IEC 42001:2023'."""
|
||||
if ":" in iso_id:
|
||||
return iso_id.split(":")[-1]
|
||||
return ""
|
||||
|
||||
def _fetch_open_data(self, keywords: list[str], since: str | None) -> list[SourceDocument]:
|
||||
"""Fetch ISO open data CSV and filter for AI standards."""
|
||||
docs = []
|
||||
try:
|
||||
console.print(" Downloading ISO deliverables catalog (CSV)...")
|
||||
resp = self.client.get(ISO_OPEN_DATA_CSV, timeout=60)
|
||||
resp.raise_for_status()
|
||||
|
||||
reader = csv.DictReader(io.StringIO(resp.text))
|
||||
ai_keywords = ["artificial intelligence", "machine learning", "neural network",
|
||||
"ai system", "trustworth", "autonomous"]
|
||||
|
||||
for row in reader:
|
||||
title = row.get("title.en", "")
|
||||
committee = row.get("ownerCommittee", "")
|
||||
ref = row.get("reference", "")
|
||||
status = row.get("currentStage", "")
|
||||
pub_date = row.get("publicationDate", "")
|
||||
scope = row.get("scope.en", "")
|
||||
|
||||
# Filter: SC 42 committee OR AI keywords in title
|
||||
is_sc42 = "SC 42" in committee
|
||||
has_ai_keyword = any(kw in title.lower() for kw in ai_keywords)
|
||||
if not (is_sc42 or has_ai_keyword):
|
||||
continue
|
||||
|
||||
if since and pub_date and pub_date < since:
|
||||
continue
|
||||
|
||||
name = _iso_id_to_name(ref)
|
||||
docs.append(SourceDocument(
|
||||
name=name,
|
||||
title=f"{ref}: {title}",
|
||||
abstract=f"ISO/IEC standard: {title}. Committee: {committee}. Status: {status}.",
|
||||
source="iso",
|
||||
source_id=ref,
|
||||
source_url=f"https://www.iso.org/standard/{ref.split(':')[0].replace('/', '%2F').replace(' ', '%20')}.html",
|
||||
time=pub_date or self._extract_year(ref),
|
||||
doc_status=status.lower() if status else "published",
|
||||
))
|
||||
|
||||
except httpx.HTTPError as e:
|
||||
console.print(f"[yellow]Could not fetch ISO open data: {e}[/]")
|
||||
except Exception as e:
|
||||
console.print(f"[yellow]Error parsing ISO CSV: {e}[/]")
|
||||
|
||||
return docs
|
||||
|
||||
def download_text(self, doc: SourceDocument) -> str | None:
|
||||
"""ISO full text is paywalled. Return abstract/scope only."""
|
||||
# Try to scrape scope from ISO website
|
||||
iso_id = doc.source_id.split(":")[0] # e.g. "ISO/IEC 42001"
|
||||
try:
|
||||
# The OBP has scope text for some standards
|
||||
url = f"https://www.iso.org/standard/{iso_id.replace('/', '%2F').replace(' ', '%20')}.html"
|
||||
resp = self.client.get(url)
|
||||
if resp.status_code == 200:
|
||||
# Extract scope/abstract from page
|
||||
scope_match = re.search(
|
||||
r'<div[^>]*id="item-abstract"[^>]*>(.*?)</div>',
|
||||
resp.text, re.DOTALL,
|
||||
)
|
||||
if scope_match:
|
||||
scope = re.sub(r'<[^>]+>', '', scope_match.group(1)).strip()
|
||||
if len(scope) > 30:
|
||||
return scope[:5000]
|
||||
time_mod.sleep(0.5)
|
||||
except httpx.HTTPError:
|
||||
pass
|
||||
return None
|
||||
|
||||
def close(self) -> None:
|
||||
self.client.close()
|
||||
193
src/ietf_analyzer/sources/itu.py
Normal file
193
src/ietf_analyzer/sources/itu.py
Normal file
@@ -0,0 +1,193 @@
|
||||
"""Fetch AI-related recommendations from ITU-T (free PDFs, no API).
|
||||
|
||||
ITU-T has no REST API. We use:
|
||||
1. A curated catalog of known AI-related recommendations (Y-series, X-series)
|
||||
2. The ITU handle system for metadata
|
||||
3. Direct PDF downloads from itu.int
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import time as time_mod
|
||||
|
||||
import httpx
|
||||
from rich.console import Console
|
||||
|
||||
from ..config import Config
|
||||
from .base import SourceDocument
|
||||
|
||||
console = Console()
|
||||
|
||||
# Known AI-relevant ITU-T Recommendations
|
||||
# Source: ITU-T Study Groups 13 (Future Networks), 16 (Multimedia), 17 (Security), 20 (IoT)
|
||||
# Format: (rec_id, title, series_topic)
|
||||
ITU_AI_CATALOG = [
|
||||
# Y-series: Global information infrastructure, cloud computing, AI
|
||||
("Y.3172", "Architectural framework for machine learning in future networks including IMT-2020",
|
||||
"AI/ML architecture"),
|
||||
("Y.3173", "Framework for evaluating intelligence levels of future networks including IMT-2020",
|
||||
"AI/ML architecture"),
|
||||
("Y.3174", "Framework for data handling to enable machine learning in future networks including IMT-2020",
|
||||
"AI/ML architecture"),
|
||||
("Y.3176", "Machine learning marketplace integration in future networks including IMT-2020",
|
||||
"AI/ML architecture"),
|
||||
("Y.3177", "Architectural framework of AI-as-a-Service to enable AI services in future networks",
|
||||
"AI/ML architecture"),
|
||||
("Y.3178", "Requirements and framework of federated machine learning",
|
||||
"Federated learning"),
|
||||
("Y.3179", "Architectural framework for AI-based network automation",
|
||||
"Network automation"),
|
||||
("Y.3180", "Framework for multi-domain ML pipeline in future networks",
|
||||
"AI/ML architecture"),
|
||||
("Y.3181", "Architectural framework for trustworthy networking based on machine learning technology",
|
||||
"Trustworthy AI"),
|
||||
("Y.3530", "Cloud computing — Overview of machine learning in future networks",
|
||||
"Cloud AI"),
|
||||
("Y.3531", "Cloud computing — Functional architecture for machine learning as a service",
|
||||
"Cloud AI"),
|
||||
("Y.4464", "Framework for IoT-area network using autonomous agents",
|
||||
"IoT agents"),
|
||||
("Y.4907", "Reference architecture for intelligent transportation systems communication network using AI",
|
||||
"AI transport"),
|
||||
# X-series: Security
|
||||
("X.1381", "Framework for AI risk assessment in telecommunication networks",
|
||||
"AI security"),
|
||||
("X.1382", "Security requirements for AI-based solutions in telecommunication networks",
|
||||
"AI security"),
|
||||
("X.1383", "Assessment criteria for trustworthiness of AI-based telecommunication services",
|
||||
"Trustworthy AI"),
|
||||
("X.1384", "Security threats and risk treatment measures for AI-based telecommunication systems",
|
||||
"AI security"),
|
||||
# Focus Group deliverables (FG-AI4H etc.)
|
||||
("FG-AI4H DEL01", "AI for Health: Ethics and Governance",
|
||||
"AI health"),
|
||||
("FG-AI4H DEL02", "AI for Health: Data handling",
|
||||
"AI health"),
|
||||
("FG-AI4H DEL7.2", "AI for Health: Clinical evaluation of AI",
|
||||
"AI health"),
|
||||
]
|
||||
|
||||
ITU_REC_BASE = "https://www.itu.int/rec/T-REC-"
|
||||
ITU_HANDLE_BASE = "https://handle.itu.int/11.1002/1000"
|
||||
|
||||
|
||||
def _rec_to_name(rec_id: str) -> str:
|
||||
"""Convert ITU-T rec ID to slug. E.g. 'Y.3172' -> 'itu-t-y-3172'."""
|
||||
slug = rec_id.lower().replace(".", "-").replace(" ", "-").replace("/", "-")
|
||||
return f"itu-t-{slug}"
|
||||
|
||||
|
||||
def _rec_to_url(rec_id: str) -> str:
|
||||
"""Best-effort URL for an ITU-T recommendation."""
|
||||
if rec_id.startswith("FG-"):
|
||||
# Focus group deliverables have different URL patterns
|
||||
return f"https://www.itu.int/en/ITU-T/focusgroups/{rec_id.split()[0].lower()}/Pages/default.aspx"
|
||||
# Standard recommendations: T-REC-Y.3172
|
||||
return f"{ITU_REC_BASE}{rec_id.replace('.', '.')}"
|
||||
|
||||
|
||||
class ITUFetcher:
|
||||
"""Fetch AI-related specs from ITU-T.
|
||||
|
||||
Uses a curated catalog since ITU-T has no search API.
|
||||
Can be extended by scraping ITU-T work programme pages.
|
||||
"""
|
||||
|
||||
def __init__(self, config: Config | None = None):
|
||||
self.config = config or Config.load()
|
||||
self.client = httpx.Client(timeout=30, follow_redirects=True)
|
||||
|
||||
def search(
|
||||
self, keywords: list[str], since: str | None = None
|
||||
) -> list[SourceDocument]:
|
||||
"""Return AI-relevant ITU-T recommendations from curated catalog."""
|
||||
console.print(" Loading ITU-T AI recommendation catalog...")
|
||||
docs: list[SourceDocument] = []
|
||||
|
||||
for rec_id, title, topic in ITU_AI_CATALOG:
|
||||
name = _rec_to_name(rec_id)
|
||||
url = _rec_to_url(rec_id)
|
||||
|
||||
docs.append(SourceDocument(
|
||||
name=name,
|
||||
title=f"ITU-T {rec_id}: {title}",
|
||||
abstract=f"ITU-T Recommendation {rec_id} on {topic}: {title}",
|
||||
source="itu",
|
||||
source_id=rec_id,
|
||||
source_url=url,
|
||||
time="",
|
||||
doc_status="published",
|
||||
extra={"topic": topic},
|
||||
))
|
||||
|
||||
# Try to enrich with actual metadata from ITU website
|
||||
console.print(f" Fetching metadata for {len(docs)} ITU-T recommendations...")
|
||||
enriched = 0
|
||||
for doc in docs:
|
||||
if self._enrich_metadata(doc):
|
||||
enriched += 1
|
||||
time_mod.sleep(0.3)
|
||||
|
||||
console.print(f" Found [bold green]{len(docs)}[/] ITU-T specs ({enriched} with metadata)")
|
||||
return docs
|
||||
|
||||
def _enrich_metadata(self, doc: SourceDocument) -> bool:
|
||||
"""Try to fetch real abstract/date from ITU website."""
|
||||
rec_id = doc.source_id
|
||||
if rec_id.startswith("FG-"):
|
||||
return False # Focus groups have different structure
|
||||
|
||||
try:
|
||||
# Try the recommendation page for scope/abstract
|
||||
url = f"https://www.itu.int/ITU-T/recommendations/rec.aspx?rec={rec_id}"
|
||||
resp = self.client.get(url)
|
||||
if resp.status_code == 200:
|
||||
html = resp.text
|
||||
# Extract summary/scope from page
|
||||
scope_match = re.search(
|
||||
r'(?:Summary|Scope|Abstract)[:\s]*</[^>]+>\s*(.+?)(?:</|<br|<p)',
|
||||
html, re.DOTALL | re.IGNORECASE,
|
||||
)
|
||||
if scope_match:
|
||||
scope = re.sub(r'<[^>]+>', '', scope_match.group(1)).strip()
|
||||
if len(scope) > 50:
|
||||
doc.abstract = scope[:2000]
|
||||
|
||||
# Extract date
|
||||
date_match = re.search(
|
||||
r'(?:Approved|Published)[:\s]*(\d{4}-\d{2}-\d{2}|\d{4}-\d{2}|\d{4})',
|
||||
html, re.IGNORECASE,
|
||||
)
|
||||
if date_match:
|
||||
doc.time = date_match.group(1)
|
||||
return True
|
||||
except httpx.HTTPError:
|
||||
pass
|
||||
return False
|
||||
|
||||
def download_text(self, doc: SourceDocument) -> str | None:
|
||||
"""ITU-T recommendations are PDFs — download and extract if possible."""
|
||||
rec_id = doc.source_id
|
||||
if rec_id.startswith("FG-"):
|
||||
return None
|
||||
|
||||
# Try to find the PDF download link
|
||||
try:
|
||||
# ITU recommendation pages have a download link
|
||||
url = f"https://www.itu.int/rec/dologin_pub.asp?lang=e&id=T-REC-{rec_id}&type=items"
|
||||
resp = self.client.get(url)
|
||||
if resp.status_code == 200 and "application/pdf" in resp.headers.get("content-type", ""):
|
||||
try:
|
||||
from io import BytesIO
|
||||
from pdfminer.high_level import extract_text
|
||||
text = extract_text(BytesIO(resp.content))
|
||||
return text[:100000] if text else None
|
||||
except ImportError:
|
||||
return f"[PDF document: {doc.title}. Install pdfminer.six to extract text.]"
|
||||
except httpx.HTTPError as e:
|
||||
console.print(f"[dim]Could not download {doc.name}: {e}[/]")
|
||||
return None
|
||||
|
||||
def close(self) -> None:
|
||||
self.client.close()
|
||||
@@ -53,11 +53,26 @@
|
||||
color: #4ade80;
|
||||
border: 1px solid rgba(34, 197, 94, 0.3);
|
||||
}
|
||||
.source-generated {
|
||||
.source-etsi {
|
||||
background: rgba(251, 146, 60, 0.15);
|
||||
color: #fb923c;
|
||||
border: 1px solid rgba(251, 146, 60, 0.3);
|
||||
}
|
||||
.source-itu {
|
||||
background: rgba(244, 114, 182, 0.15);
|
||||
color: #f472b6;
|
||||
border: 1px solid rgba(244, 114, 182, 0.3);
|
||||
}
|
||||
.source-iso {
|
||||
background: rgba(168, 85, 247, 0.15);
|
||||
color: #c084fc;
|
||||
border: 1px solid rgba(168, 85, 247, 0.3);
|
||||
}
|
||||
.source-generated {
|
||||
background: rgba(148, 163, 184, 0.15);
|
||||
color: #94a3b8;
|
||||
border: 1px solid rgba(148, 163, 184, 0.3);
|
||||
}
|
||||
.cat-pill {
|
||||
display: inline-block;
|
||||
padding: 1px 8px;
|
||||
@@ -162,7 +177,9 @@
|
||||
<option value="">All sources</option>
|
||||
<option value="ietf" {% if current_source == 'ietf' %}selected{% endif %}>IETF</option>
|
||||
<option value="w3c" {% if current_source == 'w3c' %}selected{% endif %}>W3C</option>
|
||||
<option value="generated" {% if current_source == 'generated' %}selected{% endif %}>Generated</option>
|
||||
<option value="etsi" {% if current_source == 'etsi' %}selected{% endif %}>ETSI</option>
|
||||
<option value="itu" {% if current_source == 'itu' %}selected{% endif %}>ITU-T</option>
|
||||
<option value="iso" {% if current_source == 'iso' %}selected{% endif %}>ISO/IEC</option>
|
||||
</select>
|
||||
</div>
|
||||
<!-- Sort -->
|
||||
|
||||
Reference in New Issue
Block a user