From 58315ac98208c1480091bc737f2aad92da5777fd Mon Sep 17 00:00:00 2001 From: Christian Nennemann Date: Wed, 8 Apr 2026 05:13:59 +0200 Subject: [PATCH] =?UTF-8?q?docs:=20add=20taxonomy=20paper=20=E2=80=94=20PM?= =?UTF-8?q?/OM=20methods=20for=20agent=20orchestration?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Survey of 12 operations management methods (PDCA, Scrum, DMAIC, Kanban, TOC, Lean, OODA, Cynefin, Stage-Gate, Design Thinking, TRIZ, FMEA, SPC) evaluated against 5 agent constraints. Includes compatibility matrix and decision framework. --- paper/taxonomy-refs.bib | 144 ++++++++ paper/taxonomy.tex | 783 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 927 insertions(+) create mode 100644 paper/taxonomy-refs.bib create mode 100644 paper/taxonomy.tex diff --git a/paper/taxonomy-refs.bib b/paper/taxonomy-refs.bib new file mode 100644 index 0000000..a4bc109 --- /dev/null +++ b/paper/taxonomy-refs.bib @@ -0,0 +1,144 @@ +% ---- Agent Frameworks ---- + +@article{hong2024metagpt, + title={MetaGPT: Meta Programming for A Multi-Agent Collaborative Framework}, + author={Hong, Sirui and Zhuge, Mingchen and Chen, Jonathan and Zheng, Xiawu and Cheng, Yuheng and Zhang, Ceyao and Wang, Jinlin and Wang, Zili and Yau, Steven Ka Shing and Lin, Zijuan and Zhou, Liyang and Ran, Chenyu and Xiao, Lingfeng and Wu, Chenglin and Schmidhuber, J{\"u}rgen}, + journal={arXiv preprint arXiv:2308.00352}, + year={2024}, + url={https://arxiv.org/abs/2308.00352} +} + +@article{qian2024chatdev, + title={ChatDev: Communicative Agents for Software Development}, + author={Qian, Chen and Liu, Wei and Liu, Hongzhang and Chen, Nuo and Dang, Yufan and Li, Jiahao and Yang, Cheng and Chen, Weize and Su, Yusheng and Cong, Xin and Xu, Juyuan and Li, Dahai and Liu, Zhiyuan and Sun, Maosong}, + journal={arXiv preprint arXiv:2307.07924}, + year={2024}, + url={https://arxiv.org/abs/2307.07924} +} + +@article{wu2023autogen, + title={AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation}, + author={Wu, Qingyun and Bansal, Gagan and Zhang, Jieyu and Wu, Yiran and Li, Beibin and Zhu, Erkang and Jiang, Li and Zhang, Xiaoyun and Zhang, Shaokun and Liu, Jiale and Awadallah, Ahmed Hassan and White, Ryen W. and Burger, Doug and Wang, Chi}, + journal={arXiv preprint arXiv:2308.08155}, + year={2023}, + url={https://arxiv.org/abs/2308.08155} +} + +@article{yang2024sweagent, + title={SWE-agent: Agent-Computer Interfaces Enable Automated Software Engineering}, + author={Yang, John and Jimenez, Carlos E and Wettig, Alexander and Liber, Kilian and Narasimhan, Karthik and Press, Ofir}, + journal={arXiv preprint arXiv:2405.15793}, + year={2024}, + url={https://arxiv.org/abs/2405.15793} +} + +@article{nennemann2026archeflow, + title={ArcheFlow: Multi-Agent Orchestration with Archetypal Roles and PDCA Quality Cycles}, + author={Nennemann, Christian}, + journal={arXiv preprint}, + year={2026}, + url={https://github.com/XORwell/archeflow} +} + +% ---- Persona Stability ---- + +@article{lu2026assistant, + title={The Assistant Axis: Situating and Stabilizing the Default Persona of Language Models}, + author={Lu, Christina and Gallagher, Jack and Michala, Jonathan and Fish, Kyle and Lindsey, Jack}, + journal={arXiv preprint arXiv:2601.10387}, + year={2026}, + url={https://arxiv.org/abs/2601.10387} +} + +% ---- PM/OM Foundations ---- + +@book{deming1986out, + title={Out of the Crisis}, + author={Deming, W. Edwards}, + year={1986}, + publisher={MIT Press}, + address={Cambridge, MA} +} + +@book{shewhart1939statistical, + title={Statistical Method from the Viewpoint of Quality Control}, + author={Shewhart, Walter Andrew}, + year={1939}, + publisher={Graduate School of the Department of Agriculture}, + address={Washington, DC} +} + +@book{goldratt1984goal, + title={The Goal: A Process of Ongoing Improvement}, + author={Goldratt, Eliyahu M. and Cox, Jeff}, + year={1984}, + publisher={North River Press}, + address={Great Barrington, MA} +} + +@book{ohno1988toyota, + title={Toyota Production System: Beyond Large-Scale Production}, + author={Ohno, Taiichi}, + year={1988}, + publisher={Productivity Press}, + address={Portland, OR} +} + +@book{womack1996lean, + title={Lean Thinking: Banish Waste and Create Wealth in Your Corporation}, + author={Womack, James P. and Jones, Daniel T.}, + year={1996}, + publisher={Simon \& Schuster}, + address={New York} +} + +@article{cooper1990stagegate, + title={Stage-Gate Systems: A New Tool for Managing New Products}, + author={Cooper, Robert G.}, + journal={Business Horizons}, + volume={33}, + number={3}, + pages={44--54}, + year={1990}, + publisher={Elsevier} +} + +@article{snowden2007cynefin, + title={A Leader's Framework for Decision Making}, + author={Snowden, David J. and Boone, Mary E.}, + journal={Harvard Business Review}, + volume={85}, + number={11}, + pages={68--76}, + year={2007} +} + +@book{altshuller1999innovation, + title={The Innovation Algorithm: TRIZ, Systematic Innovation and Technical Creativity}, + author={Altshuller, Genrich}, + year={1999}, + publisher={Technical Innovation Center}, + address={Worcester, MA} +} + +@article{boyd1976destruction, + title={Destruction and Creation}, + author={Boyd, John R.}, + year={1976}, + note={Unpublished manuscript, widely circulated} +} + +@book{schwaber2020scrum, + title={The Scrum Guide}, + author={Schwaber, Ken and Sutherland, Jeff}, + year={2020}, + publisher={Scrum.org}, + note={Available at \url{https://scrumguides.org}} +} + +@techreport{mil1949fmea, + title={MIL-P-1629: Procedures for Performing a Failure Mode, Effects and Criticality Analysis}, + institution={United States Department of Defense}, + year={1949}, + note={Revised as MIL-STD-1629A, 1980} +} diff --git a/paper/taxonomy.tex b/paper/taxonomy.tex new file mode 100644 index 0000000..d8d92c3 --- /dev/null +++ b/paper/taxonomy.tex @@ -0,0 +1,783 @@ +\documentclass[11pt,a4paper]{article} + +% ---- Packages ---- +\usepackage[utf8]{inputenc} +\usepackage[T1]{fontenc} +\usepackage{amsmath,amssymb} +\usepackage{graphicx} +\usepackage{booktabs} +\usepackage{hyperref} +\usepackage{xcolor} +\usepackage{listings} +\usepackage{subcaption} +\usepackage{tikz} +\usetikzlibrary{shapes,arrows.meta,positioning,fit,calc,matrix} +\usepackage[numbers]{natbib} +\usepackage{geometry} +\usepackage{enumitem} +\geometry{margin=1in} + +% ---- Colors ---- +\definecolor{highfit}{HTML}{2E7D32} +\definecolor{medfit}{HTML}{F57F17} +\definecolor{lowfit}{HTML}{C62828} +\definecolor{neutral}{HTML}{546E7A} + +% ---- Title ---- +\title{% + From Factory Floor to Token Stream:\\ + A Taxonomy of Operations Management Methods\\ + for LLM Agent Orchestration% +} + +\author{ + Christian Nennemann\\ + Independent Researcher\\ + \texttt{chris@nennemann.de} +} + +\date{April 2026} + +\begin{document} +\maketitle + +% ============================================================ +\begin{abstract} +Multi-agent systems built on large language models (LLMs) increasingly adopt +metaphors from human project management---sprints, standups, code review---yet +draw from a remarkably narrow slice of the operations management literature. +This paper presents a systematic taxonomy of twelve established PM/OM methods, +evaluates their structural compatibility with LLM agent constraints (stateless +invocation, cheap cloning, deterministic dysfunction, absence of human +psychology), and identifies which methods are underexploited, which are +inapplicable, and which require fundamental adaptation. We find that methods +designed for \emph{flow optimization} (Kanban, Theory of Constraints) and +\emph{rapid decision-making} (OODA Loop) are structurally well-suited to +agent orchestration but remain largely unexplored, while methods centered on +\emph{human psychology} (Scrum ceremonies, Design Thinking empathy phases) +transfer poorly without significant reformulation. We propose a decision +framework for selecting orchestration methods based on task complexity, agent +count, and quality requirements, and identify five open research directions +at the intersection of operations management and agentic AI. +\end{abstract} + +% ============================================================ +\section{Introduction} +\label{sec:intro} + +The dominant paradigm for multi-agent LLM systems borrows from agile software +development: agents are organized into ``teams'' with role-based +specialization, tasks are decomposed into work items, and results are reviewed +before merging \citep{hong2024metagpt, qian2024chatdev}. This borrowing is +natural---the humans building these systems are software engineers familiar +with agile methods---but it is also narrow. The operations management +literature contains dozens of methods developed over a century of industrial +practice, each encoding different assumptions about workflow structure, quality +assurance, failure modes, and coordination costs. + +Not all of these methods are equally applicable to LLM agents. Agents differ +from human workers in five structurally important ways: + +\begin{enumerate}[label=\textbf{C\arabic*}] + \item \label{c:stateless} \textbf{Stateless invocation}: Agents do not + retain memory between invocations unless explicitly persisted. Human team + members accumulate institutional knowledge automatically. + + \item \label{c:cloning} \textbf{Cheap to clone, expensive to coordinate}: + Spawning a new agent costs milliseconds and cents; coordinating two agents + costs tokens and latency. For human teams, the inverse holds---hiring is + expensive, coordination is (comparatively) cheap. + + \item \label{c:dysfunction} \textbf{Deterministic dysfunction}: LLM agents + fail in predictable, repeatable patterns---verbosity, scope creep, false + positives---rather than the varied, context-dependent failures of human + cognition \citep{nennemann2026archeflow}. + + \item \label{c:psychology} \textbf{No psychology}: Agents have no morale, + fatigue, ego, or office politics. Methods designed to manage human + psychology (retrospectives, team-building, conflict resolution) have no + direct function. + + \item \label{c:speed} \textbf{Cycle speed}: Agents complete tasks in + seconds to minutes, enabling iteration frequencies that would be + impractical for human teams. Methods that assume week-long or month-long + cycles can be compressed. +\end{enumerate} + +These constraints define a \emph{fitness landscape}: some PM/OM methods gain +effectiveness when applied to agents (because agents remove friction those +methods were designed to manage), while others lose their raison d'\^etre +(because they solve human problems agents don't have). + +This paper contributes: +\begin{itemize} + \item A systematic taxonomy of twelve PM/OM methods evaluated against the + five agent constraints (\ref{c:stateless}--\ref{c:speed}). + \item A compatibility matrix scoring each method's structural fit for + agent orchestration (\S\ref{sec:matrix}). + \item A decision framework for practitioners selecting orchestration + strategies (\S\ref{sec:decision}). + \item Five open research directions at the intersection of operations + management theory and agentic AI (\S\ref{sec:future}). +\end{itemize} + +% ============================================================ +\section{Background: Current Agent Orchestration Landscape} +\label{sec:background} + +\subsection{Frameworks and Their Implicit PM Models} + +The current generation of multi-agent LLM frameworks implicitly adopts +project management concepts, though rarely with explicit attribution to +PM/OM theory. + +\textbf{MetaGPT} \citep{hong2024metagpt} assigns human job titles (product +manager, architect, engineer) and enforces communication through Standardized +Operating Procedures (SOPs)---an implicit adoption of \emph{waterfall} +phase gates with role-based access control. + +\textbf{ChatDev} \citep{qian2024chatdev} simulates a software company with +sequential phases (design, coding, testing, documentation). Despite the +``company'' framing, the execution model is a \emph{linear pipeline} with +pair-programming-style chat between adjacent roles. + +\textbf{CrewAI} organizes agents into ``crews'' with a ``manager'' agent +orchestrating task delegation---an implicit \emph{hierarchical management} +model with single-point-of-failure coordination. + +\textbf{AutoGen} \citep{wu2023autogen} provides a conversation-based +framework where agents negotiate through multi-turn dialogue. The implicit +model is \emph{committee decision-making}---all agents see all messages, +consensus emerges through discussion. + +\textbf{ArcheFlow} \citep{nennemann2026archeflow} explicitly applies PDCA +quality cycles with Jungian archetypal roles, representing the first +framework to deliberately adopt a named PM/OM methodology with formal +convergence criteria. + +\subsection{The Gap} + +Despite the variety of frameworks, the PM/OM methods actually employed +cluster tightly around three approaches: (1) waterfall-style sequential +phases, (2) role-based team simulation, and (3) informal ``manager'' +delegation. Methods from lean manufacturing, statistical process control, +military decision-making, innovation management, and constraint theory +remain entirely unexplored in the agent orchestration literature. + +% ============================================================ +\section{Taxonomy of PM/OM Methods} +\label{sec:taxonomy} + +We evaluate twelve methods spanning five categories: iterative improvement, +flow optimization, decision-making, innovation management, and quality +engineering. For each method, we describe the core mechanism, evaluate +structural compatibility with agent constraints \ref{c:stateless}--\ref{c:speed}, +identify the primary adaptation required, and assess overall fitness. + +% ---- 3.1 Iterative Improvement ---- +\subsection{Iterative Improvement Methods} + +\subsubsection{PDCA (Plan--Do--Check--Act)} +\label{sec:pdca} + +\textbf{Origin}: Shewhart \citep{shewhart1939statistical}, popularized by +Deming \citep{deming1986out}. + +\textbf{Mechanism}: Four-phase cycle repeated until quality targets are met. +Each cycle narrows the gap between current and desired state through +structured feedback. + +\textbf{Agent fitness}: \textsc{High}. PDCA's phase structure maps directly +to agent orchestration: Plan (research + design agents), Do (implementation +agent), Check (review agents), Act (routing + merge decisions). The cycle +abstraction handles the core challenge of ``when to stop iterating'' through +convergence metrics. Demonstrated in ArcheFlow \citep{nennemann2026archeflow}. + +\textbf{Key adaptation}: Convergence detection must be automated (human PDCA +relies on subjective judgment). ArcheFlow addresses this with a convergence +score based on finding classification (new, resolved, persistent, regressed) +and oscillation detection. + +\textbf{Constraint fit}: Stateless (\ref{c:stateless})---artifacts persist +state between cycles. Cloning (\ref{c:cloning})---fresh agents per cycle +avoid accumulated bias. Speed (\ref{c:speed})---cycles complete in minutes, +enabling 2--3 cycles where humans would manage one. + +\subsubsection{Scrum} +\label{sec:scrum} + +\textbf{Origin}: Schwaber \& Sutherland, 1995. + +\textbf{Mechanism}: Time-boxed sprints with defined roles (Product Owner, +Scrum Master, Development Team), ceremonies (planning, daily standup, +review, retrospective), and artifacts (backlog, sprint board, burndown). + +\textbf{Agent fitness}: \textsc{Low--Medium}. Scrum's ceremony-heavy +structure exists primarily to manage human coordination challenges: standups +maintain shared awareness (agents can share a filesystem), retrospectives +address interpersonal friction (agents have none), sprint planning negotiates +capacity (agents have deterministic throughput). The useful kernel---time-boxed +work with a prioritized backlog---is trivially implementable without Scrum's +overhead. + +\textbf{Key adaptation}: Strip ceremonies, keep the backlog + sprint +structure. ``Daily standups'' become status file reads. ``Retrospectives'' +become cross-run memory extraction. The Scrum Master role is pure overhead +for agents. + +\textbf{Constraint fit}: Psychology (\ref{c:psychology})---most Scrum +ceremonies solve human problems. Speed (\ref{c:speed})---sprint length +compresses from weeks to minutes. Cloning (\ref{c:cloning})---team +stability (a Scrum value) is irrelevant when agents are stateless. + +\subsubsection{DMAIC (Six Sigma)} +\label{sec:dmaic} + +\textbf{Origin}: Motorola, 1986; systematized by General Electric. + +\textbf{Mechanism}: Define--Measure--Analyze--Improve--Control. Unlike PDCA, +DMAIC emphasizes \emph{statistical measurement} of process capability and +explicitly separates analysis (understanding the problem) from improvement +(fixing it). + +\textbf{Agent fitness}: \textsc{Medium--High}. The Define--Measure--Analyze +front-loading is valuable for agents: it forces explicit quality metrics +\emph{before} implementation, preventing the common failure mode of agents +optimizing for the wrong objective. The Control phase---establishing +monitoring to prevent regression---maps to cross-run memory systems. + +\textbf{Key adaptation}: Agents can compute statistical process control +metrics (defect rates, cycle times, sigma levels) automatically from event +logs. The ``Measure'' phase, which is expensive and tedious for humans, +becomes a strength: agents can instrument everything. + +\textbf{Constraint fit}: Speed (\ref{c:speed})---full DMAIC in minutes. +Dysfunction (\ref{c:dysfunction})---agent failure modes have measurable +baselines, making sigma calculations meaningful. Stateless +(\ref{c:stateless})---Control phase requires persistent monitoring, which +must be explicitly built. + +% ---- 3.2 Flow Optimization ---- +\subsection{Flow Optimization Methods} + +\subsubsection{Kanban} +\label{sec:kanban} + +\textbf{Origin}: Toyota Production System, Taiichi Ohno, 1950s. + +\textbf{Mechanism}: Pull-based workflow with explicit work-in-progress (WIP) +limits. Work items flow through columns (stages); new work is pulled only +when capacity is available. No iterations---continuous flow. + +\textbf{Agent fitness}: \textsc{High}. Kanban's WIP limits directly address +a critical agent challenge: \emph{coordination cost scaling}. Without WIP +limits, spawning more agents increases throughput initially but eventually +degrades quality due to coordination overhead (conflicting changes, merge +conflicts, context fragmentation). Kanban provides a principled mechanism for +determining optimal concurrency. + +\textbf{Key adaptation}: WIP limits should be \emph{dynamic}, adjusting +based on observed coordination costs (merge conflicts, finding duplications) +rather than fixed. The pull mechanism maps naturally: agents poll a task +queue and pull the highest-priority item they can handle. + +\textbf{Constraint fit}: Cloning (\ref{c:cloning})---WIP limits are +\emph{exactly} the missing constraint for cheap-to-clone agents. Speed +(\ref{c:speed})---flow metrics (lead time, cycle time, throughput) update +in real-time. Psychology (\ref{c:psychology})---no ``swarming'' or +``blocked item'' social dynamics to manage. + +\subsubsection{Theory of Constraints (TOC)} +\label{sec:toc} + +\textbf{Origin}: Goldratt, \emph{The Goal}, 1984. + +\textbf{Mechanism}: Identify the system's constraint (bottleneck), exploit +it (maximize its throughput), subordinate everything else to it, elevate it +(invest to remove it), repeat. The Five Focusing Steps. + +\textbf{Agent fitness}: \textsc{High}. In multi-agent pipelines, the +bottleneck is typically the most capable (and expensive) agent: the +implementation agent that must run on a powerful model, or the security +reviewer that requires deep context. TOC provides a framework for +organizing the entire pipeline around this constraint. + +\textbf{Key adaptation}: ``Exploit the constraint'' means ensuring the +bottleneck agent never waits for input. Pre-compute its context, batch +its inputs, and schedule cheaper agents (research, formatting, validation) +to run during its processing time. ``Subordinate'' means cheaper agents +should produce output in the format the bottleneck needs, not in whatever +format is easiest for them. + +\textbf{Constraint fit}: Cloning (\ref{c:cloning})---non-bottleneck agents +are cheap to overprovision. Speed (\ref{c:speed})---constraint shifts can +be detected and responded to within a single run. Dysfunction +(\ref{c:dysfunction})---bottleneck agent's failure mode has outsized impact, +justifying targeted shadow detection. + +\subsubsection{Lean / Toyota Production System} +\label{sec:lean} + +\textbf{Origin}: Ohno, 1988; Womack \& Jones, 1996. + +\textbf{Mechanism}: Eliminate waste (\emph{muda}), reduce variability +(\emph{mura}), avoid overburden (\emph{muri}). Seven wastes: overproduction, +waiting, transport, overprocessing, inventory, motion, defects. + +\textbf{Agent fitness}: \textsc{Medium--High}. The seven wastes map +surprisingly well to agent systems: + +\begin{itemize}[nosep] + \item \textbf{Overproduction}: Agents generating output nobody reads + (verbose research reports, unused alternative proposals). + \item \textbf{Waiting}: Agents idle while waiting for predecessor output + (sequential pipeline where parallel would work). + \item \textbf{Transport}: Redundant context passing (sending full codebase + to agents that need only a diff). + \item \textbf{Overprocessing}: Running thorough review on trivial changes. + \item \textbf{Inventory}: Accumulated artifacts from prior cycles that + are never referenced. + \item \textbf{Motion}: Agents reading files they don't need, exploring + irrelevant code paths. + \item \textbf{Defects}: Findings that are false positives, requiring + rework to dismiss. +\end{itemize} + +\textbf{Key adaptation}: Lean's ``respect for people'' pillar has no direct +analog. The technical pillar (continuous improvement, waste elimination) +transfers fully. + +% ---- 3.3 Decision-Making ---- +\subsection{Decision-Making Methods} + +\subsubsection{OODA Loop (Observe--Orient--Decide--Act)} +\label{sec:ooda} + +\textbf{Origin}: John Boyd, 1976. Military strategy for air combat; later +generalized to competitive decision-making. + +\textbf{Mechanism}: Continuous loop of Observe (gather data), Orient (analyze +context, update mental models), Decide (select course of action), Act +(execute). The key insight is that the \emph{speed} of the loop---not any +individual decision's quality---determines competitive advantage. ``Getting +inside the opponent's OODA loop'' means acting faster than the adversary can +react. + +\textbf{Agent fitness}: \textsc{High}. OODA is structurally similar to PDCA +but optimized for speed over thoroughness. For agent systems, this maps to +scenarios requiring rapid adaptation: adversarial testing, incident response, +market-reactive coding, or any context where the problem space changes +during execution. + +\textbf{Key adaptation}: Boyd's ``Orient'' phase---updating mental models +based on new information---is the hardest to implement for stateless agents. +It requires either persistent state (a world model that updates across +iterations) or a ``fast reorientation'' agent that rapidly synthesizes new +information into an updated context. + +\textbf{Constraint fit}: Speed (\ref{c:speed})---agents can OODA at +superhuman frequency. Stateless (\ref{c:stateless})---the Orient phase +needs explicit state management. Psychology (\ref{c:psychology})---Boyd's +concept of ``mental agility'' translates to model selection: smaller, faster +models for rapid OODA; larger models for deep Orient phases. + +\subsubsection{Cynefin Framework} +\label{sec:cynefin} + +\textbf{Origin}: Snowden \& Boone, 2007. + +\textbf{Mechanism}: Classify problems into five domains---\textsc{Clear} +(obvious cause-effect), \textsc{Complicated} (expert analysis needed), +\textsc{Complex} (emergent, probe-sense-respond), \textsc{Chaotic} +(act first, then sense), \textsc{Confused} (unknown domain)---and apply +domain-appropriate strategies. + +\textbf{Agent fitness}: \textsc{Medium--High}. Cynefin provides a +\emph{meta-framework}: instead of choosing one orchestration method for all +tasks, classify the task first, then select the appropriate method: + +\begin{itemize}[nosep] + \item \textsc{Clear}: Single agent, no review (``fix this typo''). + \item \textsc{Complicated}: Expert agent with review (PDCA fast workflow). + \item \textsc{Complex}: Multiple competing proposals, let results emerge + (PDCA standard/thorough with parallel alternatives). + \item \textsc{Chaotic}: Act immediately, stabilize, then analyze (OODA + with hotfix agent, then PDCA for proper fix). +\end{itemize} + +\textbf{Key adaptation}: Task classification must be automated. Proxies: +number of files affected, cross-module dependencies, security sensitivity, +test coverage of affected area. + +% ---- 3.4 Innovation Management ---- +\subsection{Innovation Management Methods} + +\subsubsection{Stage-Gate} +\label{sec:stagegate} + +\textbf{Origin}: Cooper, 1990. + +\textbf{Mechanism}: Innovation projects pass through stages (scoping, +business case, development, testing, launch), separated by gates where a +cross-functional team decides: Go, Kill, Hold, or Recycle. The gate +decision is binary---no ``continue with reservations.'' + +\textbf{Agent fitness}: \textsc{Medium}. The gate mechanism maps well to +agent confidence checks: a Creator agent's proposal either meets the +confidence threshold (Go) or doesn't (Kill/Recycle). However, Stage-Gate +assumes expensive stages (weeks/months of human work), making Kill decisions +high-stakes. For agents, stages are cheap (minutes), reducing the value of +formal gate decisions. + +\textbf{Key adaptation}: Gates become lightweight confidence checks rather +than committee reviews. The ``Kill'' decision---rare and painful in human +innovation---should be common and cheap for agents. Explore multiple +proposals in parallel, gate aggressively, continue only the best. + +\subsubsection{Design Thinking} +\label{sec:designthinking} + +\textbf{Origin}: IDEO / Stanford d.school, 2000s. + +\textbf{Mechanism}: Five phases: Empathize (understand the user), +Define (frame the problem), Ideate (generate solutions), Prototype (build +quickly), Test (get feedback). Emphasis on user empathy and divergent +thinking. + +\textbf{Agent fitness}: \textsc{Low}. Design Thinking's core value +proposition---\emph{empathy with users}---is precisely what LLM agents +cannot genuinely do. Agents can simulate empathy (generate persona-based +scenarios), but the insight that comes from observing real users in context +has no agent equivalent. The Ideate phase (divergent brainstorming) is +feasible but produces quantity over quality without the ``empathy filter'' +that makes Design Thinking effective. + +\textbf{Key adaptation}: If used, the Empathize phase must be replaced +with explicit user research artifacts (personas, journey maps, interview +transcripts) provided as input. This transforms Design Thinking from a +discovery method into a synthesis method---fundamentally changing its nature. + +\subsubsection{TRIZ} +\label{sec:triz} + +\textbf{Origin}: Altshuller, 1946--1985. Theory of Inventive Problem +Solving. + +\textbf{Mechanism}: Problems contain contradictions (improving one parameter +worsens another). TRIZ provides a contradiction matrix mapping 39 engineering +parameters to 40 inventive principles. Instead of compromise, TRIZ seeks +solutions that resolve the contradiction. + +\textbf{Agent fitness}: \textsc{Medium}. TRIZ's structured problem-solving +is well-suited to agents: the contradiction matrix is a lookup table, and +agents can systematically apply inventive principles. However, TRIZ requires +\emph{reformulating the problem as a contradiction}---a creative step that +is itself challenging for agents. + +\textbf{Key adaptation}: Provide the contradiction matrix as context. Train +agents to identify the ``improving parameter'' and ``worsening parameter'' +in engineering tasks (e.g., ``improving security worsens performance''). +Use TRIZ principles as a structured brainstorming prompt for the Creator +archetype. + +% ---- 3.5 Quality Engineering ---- +\subsection{Quality Engineering Methods} + +\subsubsection{FMEA (Failure Mode and Effects Analysis)} +\label{sec:fmea} + +\textbf{Origin}: US Military, 1949; adopted by automotive (AIAG) and +aerospace. + +\textbf{Mechanism}: For each component/process step, systematically +enumerate: (1) potential failure modes, (2) effects of each failure, +(3) causes, (4) current controls, (5) risk priority number +(severity $\times$ occurrence $\times$ detection). Address highest-RPN +items first. + +\textbf{Agent fitness}: \textsc{High}. FMEA's systematic enumeration is +exactly what LLM agents excel at: given a design, enumerate everything that +could go wrong, assess severity, and propose mitigations. The Risk Priority +Number provides a quantitative framework for prioritizing review effort---more +principled than the common ``CRITICAL/WARNING/INFO'' severity classification. + +\textbf{Key adaptation}: Use FMEA \emph{before} implementation (as part of +the Plan phase) rather than only during review. An FMEA agent analyzes the +Creator's proposal and generates a failure mode table; the Maker then +implements with awareness of high-RPN failure modes; the Guardian validates +that mitigations are in place. + +\textbf{Constraint fit}: Dysfunction (\ref{c:dysfunction})---agents' own +failure modes can be pre-enumerated via FMEA, creating a meta-level +quality system. Cloning (\ref{c:cloning})---FMEA agents are cheap +(analytical, not creative), enabling systematic coverage. + +\subsubsection{Statistical Process Control (SPC)} +\label{sec:spc} + +\textbf{Origin}: Shewhart, 1920s. + +\textbf{Mechanism}: Monitor process outputs over time using control charts. +Distinguish \emph{common cause} variation (inherent to the process) from +\emph{special cause} variation (attributable to specific events). React only +to special causes; reduce common cause variation through process improvement. + +\textbf{Agent fitness}: \textsc{Medium--High}. SPC requires historical data, +which agent orchestration systems naturally generate (event logs, finding +counts, cycle times, token usage). Control charts over agent effectiveness +scores can distinguish between normal variation (``Guardian found 2 issues +this run vs. 1 last run'') and genuine degradation (``Guardian's false +positive rate spiked after a model update''). + +\textbf{Key adaptation}: Sufficient run history is needed to establish +control limits. Early runs operate without SPC; after 10--20 runs, +control limits become meaningful. Model updates reset control limits +(new process = new baseline). + +% ============================================================ +\section{Compatibility Matrix} +\label{sec:matrix} + +Table~\ref{tab:matrix} scores each method against the five agent constraints, +producing an overall fitness assessment. + +\begin{table}[t] +\centering +\small +\caption{Compatibility matrix: PM/OM methods scored against agent constraints. +\textcolor{highfit}{\textbf{+}} = method benefits from this constraint; +\textcolor{lowfit}{\textbf{--}} = method is undermined; +\textcolor{neutral}{\textbf{0}} = neutral. +Overall fitness: H = High, M = Medium, L = Low.} +\label{tab:matrix} +\begin{tabular}{@{}l*{5}{c}c@{}} +\toprule +\textbf{Method} & +\textbf{C1} & +\textbf{C2} & +\textbf{C3} & +\textbf{C4} & +\textbf{C5} & +\textbf{Fit} \\ +\midrule +PDCA & \textcolor{neutral}{0} & \textcolor{highfit}{+} & \textcolor{highfit}{+} & \textcolor{neutral}{0} & \textcolor{highfit}{+} & \textbf{H} \\ +Scrum & \textcolor{lowfit}{--} & \textcolor{neutral}{0} & \textcolor{neutral}{0} & \textcolor{lowfit}{--} & \textcolor{highfit}{+} & \textbf{L--M} \\ +DMAIC & \textcolor{lowfit}{--} & \textcolor{highfit}{+} & \textcolor{highfit}{+} & \textcolor{neutral}{0} & \textcolor{highfit}{+} & \textbf{M--H} \\ +Kanban & \textcolor{neutral}{0} & \textcolor{highfit}{+} & \textcolor{neutral}{0} & \textcolor{highfit}{+} & \textcolor{highfit}{+} & \textbf{H} \\ +TOC & \textcolor{neutral}{0} & \textcolor{highfit}{+} & \textcolor{highfit}{+} & \textcolor{highfit}{+} & \textcolor{highfit}{+} & \textbf{H} \\ +Lean & \textcolor{neutral}{0} & \textcolor{highfit}{+} & \textcolor{neutral}{0} & \textcolor{lowfit}{--} & \textcolor{highfit}{+} & \textbf{M--H} \\ +OODA & \textcolor{lowfit}{--} & \textcolor{highfit}{+} & \textcolor{neutral}{0} & \textcolor{highfit}{+} & \textcolor{highfit}{+} & \textbf{H} \\ +Cynefin & \textcolor{neutral}{0} & \textcolor{neutral}{0} & \textcolor{neutral}{0} & \textcolor{neutral}{0} & \textcolor{neutral}{0} & \textbf{M--H} \\ +Stage-Gate & \textcolor{neutral}{0} & \textcolor{highfit}{+} & \textcolor{neutral}{0} & \textcolor{highfit}{+} & \textcolor{lowfit}{--} & \textbf{M} \\ +Design Think. & \textcolor{neutral}{0} & \textcolor{neutral}{0} & \textcolor{neutral}{0} & \textcolor{lowfit}{--} & \textcolor{neutral}{0} & \textbf{L} \\ +TRIZ & \textcolor{neutral}{0} & \textcolor{highfit}{+} & \textcolor{neutral}{0} & \textcolor{neutral}{0} & \textcolor{highfit}{+} & \textbf{M} \\ +FMEA & \textcolor{neutral}{0} & \textcolor{highfit}{+} & \textcolor{highfit}{+} & \textcolor{highfit}{+} & \textcolor{highfit}{+} & \textbf{H} \\ +SPC & \textcolor{lowfit}{--} & \textcolor{neutral}{0} & \textcolor{highfit}{+} & \textcolor{highfit}{+} & \textcolor{highfit}{+} & \textbf{M--H} \\ +\bottomrule +\end{tabular} +\end{table} + +\subsection{Analysis} + +Several patterns emerge from the compatibility matrix: + +\textbf{High-fitness methods share three properties}: they are +\emph{mechanistic} (decisions follow rules, not judgment), \emph{flow-oriented} +(optimize throughput, not team dynamics), and \emph{metric-driven} (quality +is quantified, not discussed). PDCA, Kanban, TOC, OODA, and FMEA all share +this profile. + +\textbf{Low-fitness methods are psychology-dependent}: Scrum and Design +Thinking derive their primary value from managing human cognitive and social +limitations. Without those limitations, the methods become overhead. + +\textbf{The ``Cheap Clone'' constraint is universally beneficial}: every +method either benefits from or is neutral to the ability to spawn agents +cheaply. This suggests that agent orchestration should generally favor +\emph{parallelism}---run multiple approaches simultaneously, then +select the best result. + +\textbf{``Stateless'' is the most disruptive constraint}: methods that +assume accumulated knowledge (Scrum's team velocity, SPC's control charts, +DMAIC's baseline measurements) require explicit persistence mechanisms that +agents don't provide natively. + +% ============================================================ +\section{Hybrid Approaches and Method Composition} +\label{sec:hybrid} + +The methods in our taxonomy are not mutually exclusive. Effective agent +orchestration likely requires combining methods at different levels: + +\subsection{Proposed Three-Layer Architecture} + +\begin{description} + \item[Strategic layer (Cynefin)]: Classify the task and select the + appropriate orchestration method. Simple tasks get a single agent; + complicated tasks get PDCA; complex tasks get parallel competing + approaches; chaotic tasks get OODA. + + \item[Operational layer (PDCA/OODA + Kanban)]: Execute the selected + method with flow control. Kanban WIP limits prevent coordination + overload. PDCA provides quality convergence for standard tasks; OODA + provides rapid adaptation for time-sensitive tasks. + + \item[Quality layer (FMEA + SPC + TOC)]: Monitor execution quality. + FMEA front-loads failure analysis in the Plan phase. SPC monitors + long-term agent effectiveness trends. TOC identifies and optimizes + around bottleneck agents. +\end{description} + +\subsection{ArcheFlow as a Case Study} + +ArcheFlow \citep{nennemann2026archeflow} already implements elements of +this three-layer architecture, though without explicitly naming all methods: + +\begin{itemize}[nosep] + \item \textbf{Strategic}: Workflow selection (fast/standard/thorough) + functions as a simplified Cynefin classification. + \item \textbf{Operational}: PDCA cycles with convergence detection; + sprint mode with WIP-limited parallel dispatch (implicit Kanban). + \item \textbf{Quality}: Shadow detection (behavioral FMEA for agent + failure modes); effectiveness scoring (rudimentary SPC); Guardian + fast-path (TOC---don't waste the bottleneck on clean code). +\end{itemize} + +The gap is in explicit TOC application (identifying and optimizing around +the most expensive agent) and in OODA integration for time-sensitive tasks. + +% ============================================================ +\section{Decision Framework} +\label{sec:decision} + +We propose a practitioner-oriented decision framework for selecting +orchestration methods based on three dimensions: + +\begin{figure}[h] +\centering +\begin{tikzpicture}[ + box/.style={draw, rounded corners, minimum width=3.5cm, minimum height=0.7cm, font=\small, fill=#1}, + arrow/.style={-{Stealth[length=3mm]}, thick}, +] + +% Decision tree +\node[box=yellow!20] (start) {Task arrives}; +\node[box=orange!15, below=0.8cm of start] (cynefin) {Classify (Cynefin)}; + +\node[box=green!15, below left=1cm and 2cm of cynefin] (clear) {Clear}; +\node[box=green!15, below left=1cm and 0cm of cynefin] (complicated) {Complicated}; +\node[box=blue!10, below right=1cm and 0cm of cynefin] (complex) {Complex}; +\node[box=red!10, below right=1cm and 2cm of cynefin] (chaotic) {Chaotic}; + +\node[box=white, below=0.7cm of clear, text width=2.5cm, align=center, font=\scriptsize] (m1) {Single agent\\No review}; +\node[box=white, below=0.7cm of complicated, text width=2.5cm, align=center, font=\scriptsize] (m2) {PDCA fast\\+ FMEA}; +\node[box=white, below=0.7cm of complex, text width=2.5cm, align=center, font=\scriptsize] (m3) {PDCA thorough\\+ parallel proposals}; +\node[box=white, below=0.7cm of chaotic, text width=2.5cm, align=center, font=\scriptsize] (m4) {OODA\\then PDCA}; + +\draw[arrow] (start) -- (cynefin); +\draw[arrow] (cynefin) -- (clear); +\draw[arrow] (cynefin) -- (complicated); +\draw[arrow] (cynefin) -- (complex); +\draw[arrow] (cynefin) -- (chaotic); +\draw[arrow] (clear) -- (m1); +\draw[arrow] (complicated) -- (m2); +\draw[arrow] (complex) -- (m3); +\draw[arrow] (chaotic) -- (m4); + +\end{tikzpicture} +\caption{Decision framework for selecting agent orchestration method +based on Cynefin task classification.} +\label{fig:decision} +\end{figure} + +\textbf{Cross-cutting concerns} apply regardless of classification: +\begin{itemize}[nosep] + \item \textbf{Kanban WIP limits}: Always. Prevents coordination overload. + \item \textbf{TOC awareness}: Identify the costliest agent; schedule + others around it. + \item \textbf{SPC monitoring}: After 10+ runs, establish control limits + for agent effectiveness. + \item \textbf{Lean waste audit}: Periodically review token usage patterns + for waste (unused artifacts, redundant context, overprocessing). +\end{itemize} + +% ============================================================ +\section{Open Research Directions} +\label{sec:future} + +\subsection{Adaptive Method Selection} + +Current frameworks use a fixed orchestration method. An adaptive system +would classify each incoming task (Cynefin), select the appropriate method, +and switch methods mid-execution if the task's nature changes (e.g., +a ``complicated'' task reveals unexpected complexity during exploration). +This requires a \emph{method-aware orchestrator} that understands the +assumptions and exit criteria of each method. + +\subsection{Kanban for Agent Swarms} + +As agent counts increase beyond 5--10, coordination costs dominate. +Kanban's WIP limits and flow metrics provide a theoretical basis for +determining optimal agent concurrency, but empirical studies are needed +to establish how coordination cost scales with agent count across +different task types and model capabilities. + +\subsection{OODA for Adversarial Agent Scenarios} + +Boyd's OODA loop was designed for competitive environments where speed of +decision-making determines the winner. Applications include adversarial +testing (red team agents vs. blue team agents), competitive code generation +(multiple agents racing to solve a problem), and incident response +(rapid diagnosis and mitigation under time pressure). + +\subsection{Cross-Method Quality Metrics} + +Each PM/OM method defines quality differently: PDCA uses convergence scores, +Six Sigma uses sigma levels, Lean uses waste ratios, SPC uses control +limits. A unified quality metric for agent orchestration---one that allows +meaningful comparison across methods---does not yet exist. + +\subsection{FMEA for Agent Failure Modes} + +Agent failure modes (hallucination, scope creep, false positive reviews, +persona drift \citep{lu2026assistant}) can be systematically enumerated +using FMEA methodology. A comprehensive FMEA catalog for LLM agents---with +severity, occurrence, and detection ratings calibrated from empirical +data---would provide a foundation for designing more robust orchestration +systems. + +% ============================================================ +\section{Conclusion} +\label{sec:conclusion} + +The operations management literature offers a rich toolkit for agent +orchestration that extends far beyond the agile methods currently dominant +in the field. Our taxonomy reveals that the highest-fitness methods---PDCA, +Kanban, TOC, OODA, and FMEA---share a common profile: mechanistic, +flow-oriented, and metric-driven. Methods centered on human psychology +(Scrum, Design Thinking) transfer poorly without fundamental reformulation. + +The key insight is that LLM agents are not ``fast humans.'' They have +fundamentally different constraint profiles---cheap to clone, expensive to +coordinate, stateless, psychologically inert---and these differences make +some PM/OM methods \emph{more} effective (OODA loops at superhuman speed, +FMEA with exhaustive enumeration) while rendering others irrelevant +(standups without psychology, retrospectives without learning). + +We encourage the agent orchestration community to look beyond agile sprints +and role-playing frameworks toward the broader operations management +tradition. A century of industrial practice has much to teach us about +orchestrating intelligent agents---if we take the time to translate. + +% ============================================================ +\section*{Acknowledgments} + +The author thanks the operations management and quality engineering +communities whose work, developed over decades for human organizations, +provides the theoretical foundation for this analysis. + +% ============================================================ +\bibliographystyle{plainnat} +\bibliography{taxonomy-refs} + +\end{document}