#!/usr/bin/env python3 """Export interactive HTML visualizations to static images for the paper. Run from the paper/ directory: python export_figures.py Copies PNG figures directly and exports HTML charts to PNG via plotly's kaleido engine. If kaleido is not installed, creates placeholder PDFs instead. """ import shutil from pathlib import Path FIGURES_SRC = Path(__file__).parent.parent / "data" / "figures" FIGURES_DST = Path(__file__).parent / "figures" FIGURES_DST.mkdir(exist_ok=True) # Direct PNG copies (already publication-ready) PNG_FILES = [ "similarity-heatmap.png", "score-distributions.png", ] # HTML charts to export as static PNG (requires kaleido) HTML_EXPORTS = { "timeline.html": "timeline.png", "score-vs-overlap.html": "quality.png", "category-radar.html": "radar.png", "author-network.html": "network.png", "landscape-tsne.html": "landscape-tsne.png", "bubble-explorer.html": "bubble.png", "category-treemap.html": "treemap.png", "org-contributions.html": "orgs.png", } def copy_pngs(): for name in PNG_FILES: src = FIGURES_SRC / name if src.exists(): shutil.copy2(src, FIGURES_DST / name) print(f" Copied {name}") else: print(f" MISSING {name}") def export_html_charts(): try: import plotly.io as pio from plotly.io import read_json except ImportError: print(" plotly not available, skipping HTML exports") return try: # Test if kaleido is available import kaleido has_kaleido = True except ImportError: has_kaleido = False print(" kaleido not installed (pip install kaleido)") print(" To get static PNGs from HTML charts, install kaleido and re-run.") print(" For now, creating placeholder instructions.\n") if not has_kaleido: # Write instructions for manual export instructions = FIGURES_DST / "EXPORT_INSTRUCTIONS.md" instructions.write_text( "# Manual Figure Export\n\n" "Install kaleido for automatic export:\n" " pip install kaleido\n\n" "Or open each HTML file in a browser and use the Plotly toolbar\n" "(camera icon) to save as PNG.\n\n" "Required files:\n" + "".join(f"- {v}\n" for v in HTML_EXPORTS.values()) ) print(f" Wrote {instructions}") return for html_name, png_name in HTML_EXPORTS.items(): src = FIGURES_SRC / html_name if not src.exists(): print(f" MISSING {html_name}") continue try: # Read the HTML, extract the plotly figure JSON, render to PNG html_content = src.read_text() # Extract Plotly JSON from the HTML import json import re match = re.search(r'Plotly\.newPlot\(\s*"[^"]*"\s*,\s*(\[.*?\])\s*,\s*(\{.*?\})\s*,\s*\{', html_content, re.DOTALL) if match: data = json.loads(match.group(1)) layout = json.loads(match.group(2)) import plotly.graph_objects as go fig = go.Figure(data=data, layout=layout) fig.write_image(str(FIGURES_DST / png_name), scale=2, width=1200, height=800) print(f" Exported {html_name} -> {png_name}") else: print(f" Could not parse Plotly JSON from {html_name}") except Exception as e: print(f" Failed {html_name}: {e}") def create_placeholder_pdfs(): """Create minimal placeholder PDFs for figures that haven't been exported yet.""" placeholders = [ "timeline-placeholder.pdf", "quality-placeholder.pdf", "radar-placeholder.pdf", "network-placeholder.pdf", ] try: import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt for name in placeholders: fig, ax = plt.subplots(figsize=(10, 6)) ax.text(0.5, 0.5, f"[{name.replace('-placeholder.pdf', '').upper()}]\n\n" "Replace with exported figure from\n" "data/figures/ (HTML → PNG/PDF)", ha="center", va="center", fontsize=14, color="gray", transform=ax.transAxes) ax.set_xlim(0, 1) ax.set_ylim(0, 1) ax.axis("off") fig.savefig(str(FIGURES_DST / name), bbox_inches="tight") plt.close(fig) print(f" Created placeholder: {name}") except Exception as e: print(f" Could not create placeholders: {e}") if __name__ == "__main__": print("Copying PNG figures...") copy_pngs() print("\nExporting HTML charts...") export_html_charts() print("\nCreating placeholder PDFs...") create_placeholder_pdfs() print("\nDone. Check paper/figures/ for outputs.")