3176 lines
133 KiB
HTML
3176 lines
133 KiB
HTML
<!DOCTYPE html>
|
||
<html lang="en" class="Internet-Draft">
|
||
<head>
|
||
<meta charset="utf-8">
|
||
<meta content="Common,Latin" name="scripts">
|
||
<meta content="initial-scale=1.0" name="viewport">
|
||
<title>Agent Failure Cascade Prevention and Rollback</title>
|
||
<meta content="Christian Nennemann" name="author">
|
||
<meta content="
|
||
This document defines protocols for preventing agent failures from
|
||
cascading across interconnected autonomous systems and standardized
|
||
mechanisms for real-time rollback of incorrect agent decisions. It
|
||
specifies a circuit breaker protocol with well-defined state
|
||
transitions, failure domain isolation through bulkhead patterns, cascade
|
||
detection via error rate and latency analysis, and a distributed
|
||
rollback coordination protocol that walks the Execution Context Token
|
||
(ECT) DAG backwards to revert agent actions to a known-good state.
|
||
This document absorbs and supersedes the concepts introduced in earlier
|
||
AERR and ATD proposals.
|
||
" name="description">
|
||
<meta content="xml2rfc 3.31.0" name="generator">
|
||
<meta content="cascade prevention" name="keyword">
|
||
<meta content="circuit breaker" name="keyword">
|
||
<meta content="rollback" name="keyword">
|
||
<meta content="failure domain" name="keyword">
|
||
<meta content="agent recovery" name="keyword">
|
||
<meta content="draft-nennemann-agent-cascade-prevention-00" name="ietf.draft">
|
||
<!-- Generator version information:
|
||
xml2rfc 3.31.0
|
||
Python 3.14.3
|
||
ConfigArgParse 1.7.1
|
||
google-i18n-address 3.1.1
|
||
intervaltree 3.2.1
|
||
Jinja2 3.1.6
|
||
lxml 6.0.2
|
||
platformdirs 4.9.2
|
||
pycountry 26.2.16
|
||
PyYAML 6.0.3
|
||
requests 2.32.5
|
||
wcwidth 0.6.0
|
||
-->
|
||
<link href="draft-nennemann-agent-cascade-prevention-00.xml" rel="alternate" type="application/rfc+xml">
|
||
<link href="#copyright" rel="license">
|
||
<style type="text/css">/*
|
||
|
||
NOTE: Changes at the bottom of this file overrides some earlier settings.
|
||
|
||
Once the style has stabilized and has been adopted as an official RFC style,
|
||
this can be consolidated so that style settings occur only in one place, but
|
||
for now the contents of this file consists first of the initial CSS work as
|
||
provided to the RFC Formatter (xml2rfc) work, followed by itemized and
|
||
commented changes found necessary during the development of the v3
|
||
formatters.
|
||
|
||
*/
|
||
|
||
/* fonts */
|
||
@import url('https://static.ietf.org/fonts/noto-sans/import.css'); /* Sans-serif */
|
||
@import url('https://static.ietf.org/fonts/noto-serif/import.css'); /* Serif (print) */
|
||
@import url('https://static.ietf.org/fonts/roboto-mono/import.css'); /* Monospace */
|
||
|
||
:root {
|
||
--font-sans: 'Noto Sans', Arial, Helvetica, sans-serif;
|
||
--font-serif: 'Noto Serif', 'Times', 'Times New Roman', serif;
|
||
--font-mono: 'Roboto Mono', Courier, 'Courier New', monospace;
|
||
}
|
||
|
||
@viewport {
|
||
zoom: 1.0;
|
||
}
|
||
@-ms-viewport {
|
||
width: extend-to-zoom;
|
||
zoom: 1.0;
|
||
}
|
||
/* general and mobile first */
|
||
html {
|
||
}
|
||
body {
|
||
max-width: 90%;
|
||
margin: 1.5em auto;
|
||
color: #222;
|
||
background-color: #fff;
|
||
font-size: 14px;
|
||
font-family: var(--font-sans);
|
||
line-height: 1.6;
|
||
scroll-behavior: smooth;
|
||
overflow-wrap: break-word;
|
||
}
|
||
.ears {
|
||
display: none;
|
||
}
|
||
|
||
/* headings */
|
||
#title, h1, h2, h3, h4, h5, h6 {
|
||
margin: 1em 0 0.5em;
|
||
font-weight: bold;
|
||
line-height: 1.3;
|
||
}
|
||
#title {
|
||
clear: both;
|
||
border-bottom: 1px solid #ddd;
|
||
margin: 0 0 0.5em 0;
|
||
padding: 1em 0 0.5em;
|
||
}
|
||
.author {
|
||
padding-bottom: 4px;
|
||
}
|
||
h1 {
|
||
font-size: 26px;
|
||
margin: 1em 0;
|
||
}
|
||
h2 {
|
||
font-size: 22px;
|
||
margin-top: -20px; /* provide offset for in-page anchors */
|
||
padding-top: 33px;
|
||
}
|
||
h3 {
|
||
font-size: 18px;
|
||
margin-top: -36px; /* provide offset for in-page anchors */
|
||
padding-top: 42px;
|
||
}
|
||
h4 {
|
||
font-size: 16px;
|
||
margin-top: -36px; /* provide offset for in-page anchors */
|
||
padding-top: 42px;
|
||
}
|
||
h5, h6 {
|
||
font-size: 14px;
|
||
}
|
||
#n-copyright-notice {
|
||
border-bottom: 1px solid #ddd;
|
||
padding-bottom: 1em;
|
||
margin-bottom: 1em;
|
||
}
|
||
/* general structure */
|
||
p {
|
||
padding: 0;
|
||
margin: 0 0 1em 0;
|
||
text-align: left;
|
||
}
|
||
div, span {
|
||
position: relative;
|
||
}
|
||
div {
|
||
margin: 0;
|
||
}
|
||
.alignRight.art-text {
|
||
background-color: #f9f9f9;
|
||
border: 1px solid #eee;
|
||
border-radius: 3px;
|
||
padding: 1em 1em 0;
|
||
margin-bottom: 1.5em;
|
||
}
|
||
.alignRight.art-text pre {
|
||
padding: 0;
|
||
}
|
||
.alignRight {
|
||
margin: 1em 0;
|
||
}
|
||
.alignRight > *:first-child {
|
||
border: none;
|
||
margin: 0;
|
||
float: right;
|
||
clear: both;
|
||
}
|
||
.alignRight > *:nth-child(2) {
|
||
clear: both;
|
||
display: block;
|
||
border: none;
|
||
}
|
||
svg {
|
||
display: block;
|
||
}
|
||
@media print {
|
||
svg {
|
||
max-height: 850px;
|
||
max-width: 660px;
|
||
}
|
||
}
|
||
svg[font-family~="serif" i], svg [font-family~="serif" i] {
|
||
font-family: var(--font-serif);
|
||
}
|
||
svg[font-family~="sans-serif" i], svg [font-family~="sans-serif" i] {
|
||
font-family: var(--font-sans);
|
||
}
|
||
svg[font-family~="monospace" i], svg [font-family~="monospace" i] {
|
||
font-family: var(--font-mono);
|
||
}
|
||
.alignCenter.art-text {
|
||
background-color: #f9f9f9;
|
||
border: 1px solid #eee;
|
||
border-radius: 3px;
|
||
padding: 1em 1em 0;
|
||
margin-bottom: 1.5em;
|
||
}
|
||
.alignCenter.art-text pre {
|
||
padding: 0;
|
||
}
|
||
.alignCenter {
|
||
margin: 1em 0;
|
||
}
|
||
.alignCenter > *:first-child {
|
||
display: table;
|
||
border: none;
|
||
margin: 0 auto;
|
||
}
|
||
|
||
/* lists */
|
||
ol, ul {
|
||
padding: 0;
|
||
margin: 0 0 1em 2em;
|
||
}
|
||
ol ol, ul ul, ol ul, ul ol {
|
||
margin-left: 1em;
|
||
}
|
||
li {
|
||
margin: 0 0 0.25em 0;
|
||
}
|
||
.ulCompact li {
|
||
margin: 0;
|
||
}
|
||
ul.empty, .ulEmpty {
|
||
list-style-type: none;
|
||
}
|
||
ul.empty li, .ulEmpty li {
|
||
margin-top: 0.5em;
|
||
}
|
||
ul.ulBare, li.ulBare {
|
||
margin-left: 0em !important;
|
||
}
|
||
ul.compact, .ulCompact,
|
||
ol.compact, .olCompact {
|
||
line-height: 100%;
|
||
margin: 0 0 0 2em;
|
||
}
|
||
|
||
/* definition lists */
|
||
dl {
|
||
}
|
||
dl > dt {
|
||
float: left;
|
||
margin-right: 1em;
|
||
}
|
||
/*
|
||
dl.nohang > dt {
|
||
float: none;
|
||
}
|
||
*/
|
||
dl > dd {
|
||
margin-bottom: .8em;
|
||
min-height: 1.3em;
|
||
}
|
||
dl.compact > dd, .dlCompact > dd {
|
||
margin-bottom: 0em;
|
||
}
|
||
dl > dd > dl {
|
||
margin-top: 0.5em;
|
||
margin-bottom: 0em;
|
||
}
|
||
|
||
/* links */
|
||
a {
|
||
text-decoration: none;
|
||
}
|
||
a[href] {
|
||
color: #22e; /* Arlen: WCAG 2019 */
|
||
}
|
||
a[href]:hover {
|
||
background-color: #f2f2f2;
|
||
}
|
||
figcaption a[href],
|
||
a[href].selfRef {
|
||
color: #222;
|
||
}
|
||
/* XXX probably not this:
|
||
a.selfRef:hover {
|
||
background-color: transparent;
|
||
cursor: default;
|
||
} */
|
||
|
||
/* Figures */
|
||
tt, code, pre {
|
||
background-color: #f9f9f9;
|
||
font-family: var(--font-mono);
|
||
}
|
||
pre {
|
||
border: 1px solid #eee;
|
||
margin: 0;
|
||
padding: 1em;
|
||
}
|
||
img {
|
||
max-width: 100%;
|
||
}
|
||
figure {
|
||
margin: 0;
|
||
}
|
||
figure blockquote {
|
||
margin: 0.8em 0.4em 0.4em;
|
||
}
|
||
figcaption {
|
||
font-style: italic;
|
||
margin: 0 0 1em 0;
|
||
}
|
||
@media screen {
|
||
pre {
|
||
overflow-x: auto;
|
||
max-width: 100%;
|
||
max-width: calc(100% - 22px);
|
||
}
|
||
}
|
||
|
||
/* aside, blockquote */
|
||
aside, blockquote {
|
||
margin-left: 0;
|
||
padding: 1.2em 2em;
|
||
}
|
||
blockquote {
|
||
background-color: #f9f9f9;
|
||
color: #111; /* Arlen: WCAG 2019 */
|
||
border: 1px solid #ddd;
|
||
border-radius: 3px;
|
||
margin: 1em 0;
|
||
}
|
||
blockquote > *:last-child {
|
||
margin-bottom: 0;
|
||
}
|
||
cite {
|
||
display: block;
|
||
text-align: right;
|
||
font-style: italic;
|
||
}
|
||
.xref {
|
||
overflow-wrap: normal;
|
||
}
|
||
|
||
/* tables */
|
||
table {
|
||
width: 100%;
|
||
margin: 0 0 1em;
|
||
border-collapse: collapse;
|
||
border: 1px solid #eee;
|
||
}
|
||
th, td {
|
||
text-align: left;
|
||
vertical-align: top;
|
||
padding: 0.5em 0.75em;
|
||
}
|
||
th {
|
||
text-align: left;
|
||
background-color: #e9e9e9;
|
||
}
|
||
tr:nth-child(2n+1) > td {
|
||
background-color: #f5f5f5;
|
||
}
|
||
table caption {
|
||
font-style: italic;
|
||
margin: 0;
|
||
padding: 0;
|
||
text-align: left;
|
||
}
|
||
table p {
|
||
/* XXX to avoid bottom margin on table row signifiers. If paragraphs should
|
||
be allowed within tables more generally, it would be far better to select on a class. */
|
||
margin: 0;
|
||
}
|
||
|
||
/* pilcrow */
|
||
a.pilcrow {
|
||
color: #666; /* Arlen: AHDJ 2019 */
|
||
text-decoration: none;
|
||
visibility: hidden;
|
||
user-select: none;
|
||
-ms-user-select: none;
|
||
-o-user-select:none;
|
||
-moz-user-select: none;
|
||
-khtml-user-select: none;
|
||
-webkit-user-select: none;
|
||
-webkit-touch-callout: none;
|
||
}
|
||
@media screen {
|
||
aside:hover > a.pilcrow,
|
||
p:hover > a.pilcrow,
|
||
blockquote:hover > a.pilcrow,
|
||
div:hover > a.pilcrow,
|
||
li:hover > a.pilcrow,
|
||
pre:hover > a.pilcrow {
|
||
visibility: visible;
|
||
}
|
||
a.pilcrow:hover {
|
||
background-color: transparent;
|
||
}
|
||
}
|
||
|
||
/* misc */
|
||
hr {
|
||
border: 0;
|
||
border-top: 1px solid #eee;
|
||
}
|
||
.bcp14 {
|
||
font-variant: small-caps;
|
||
}
|
||
|
||
.role {
|
||
font-variant: all-small-caps;
|
||
}
|
||
|
||
/* info block */
|
||
#identifiers {
|
||
margin: 0;
|
||
font-size: 0.9em;
|
||
}
|
||
#identifiers dt {
|
||
width: 3em;
|
||
clear: left;
|
||
}
|
||
#identifiers dd {
|
||
float: left;
|
||
margin-bottom: 0;
|
||
}
|
||
/* Fix PDF info block run off issue */
|
||
@media print {
|
||
#identifiers dd {
|
||
max-width: 100%;
|
||
}
|
||
}
|
||
#identifiers .authors .author {
|
||
display: inline-block;
|
||
margin-right: 1.5em;
|
||
}
|
||
#identifiers .authors .org {
|
||
font-style: italic;
|
||
}
|
||
|
||
/* The prepared/rendered info at the very bottom of the page */
|
||
.docInfo {
|
||
color: #666; /* Arlen: WCAG 2019 */
|
||
font-size: 0.9em;
|
||
font-style: italic;
|
||
margin-top: 2em;
|
||
}
|
||
.docInfo .prepared {
|
||
float: left;
|
||
}
|
||
.docInfo .prepared {
|
||
float: right;
|
||
}
|
||
|
||
/* table of contents */
|
||
#toc {
|
||
padding: 0.75em 0 2em 0;
|
||
margin-bottom: 1em;
|
||
}
|
||
nav.toc ul {
|
||
margin: 0 0.5em 0 0;
|
||
padding: 0;
|
||
list-style: none;
|
||
}
|
||
nav.toc li {
|
||
line-height: 1.3em;
|
||
margin: 0.75em 0;
|
||
padding-left: 1.2em;
|
||
text-indent: -1.2em;
|
||
}
|
||
/* references */
|
||
.references dt {
|
||
text-align: right;
|
||
font-weight: bold;
|
||
min-width: 7em;
|
||
}
|
||
.references dd {
|
||
margin-left: 8em;
|
||
overflow: auto;
|
||
}
|
||
|
||
.refInstance {
|
||
margin-bottom: 1.25em;
|
||
}
|
||
|
||
.refSubseries {
|
||
margin-bottom: 1.25em;
|
||
}
|
||
|
||
.references .ascii {
|
||
margin-bottom: 0.25em;
|
||
}
|
||
|
||
/* index */
|
||
.index ul {
|
||
margin: 0 0 0 1em;
|
||
padding: 0;
|
||
list-style: none;
|
||
}
|
||
.index ul ul {
|
||
margin: 0;
|
||
}
|
||
.index li {
|
||
margin: 0;
|
||
text-indent: -2em;
|
||
padding-left: 2em;
|
||
padding-bottom: 5px;
|
||
}
|
||
.indexIndex {
|
||
margin: 0.5em 0 1em;
|
||
}
|
||
.index a {
|
||
font-weight: 700;
|
||
}
|
||
/* make the index two-column on all but the smallest screens */
|
||
@media (min-width: 600px) {
|
||
.index ul {
|
||
-moz-column-count: 2;
|
||
-moz-column-gap: 20px;
|
||
}
|
||
.index ul ul {
|
||
-moz-column-count: 1;
|
||
-moz-column-gap: 0;
|
||
}
|
||
}
|
||
|
||
/* authors */
|
||
address.vcard {
|
||
font-style: normal;
|
||
margin: 1em 0;
|
||
}
|
||
|
||
address.vcard .nameRole {
|
||
font-weight: 700;
|
||
margin-left: 0;
|
||
}
|
||
address.vcard .label {
|
||
font-family: var(--font-sans);
|
||
margin: 0.5em 0;
|
||
}
|
||
address.vcard .type {
|
||
display: none;
|
||
}
|
||
.alternative-contact {
|
||
margin: 1.5em 0 1em;
|
||
}
|
||
hr.addr {
|
||
border-top: 1px dashed;
|
||
margin: 0;
|
||
color: #ddd;
|
||
max-width: calc(100% - 16px);
|
||
}
|
||
|
||
/* temporary notes */
|
||
.rfcEditorRemove::before {
|
||
position: absolute;
|
||
top: 0.2em;
|
||
right: 0.2em;
|
||
padding: 0.2em;
|
||
content: "The RFC Editor will remove this note";
|
||
color: #9e2a00; /* Arlen: WCAG 2019 */
|
||
background-color: #ffd; /* Arlen: WCAG 2019 */
|
||
}
|
||
.rfcEditorRemove {
|
||
position: relative;
|
||
padding-top: 1.8em;
|
||
background-color: #ffd; /* Arlen: WCAG 2019 */
|
||
border-radius: 3px;
|
||
}
|
||
.cref {
|
||
background-color: #ffd; /* Arlen: WCAG 2019 */
|
||
padding: 2px 4px;
|
||
}
|
||
.crefSource {
|
||
font-style: italic;
|
||
}
|
||
/* alternative layout for smaller screens */
|
||
@media screen and (max-width: 1023px) {
|
||
body {
|
||
padding-top: 2em;
|
||
}
|
||
#title {
|
||
padding: 1em 0;
|
||
}
|
||
h1 {
|
||
font-size: 24px;
|
||
}
|
||
h2 {
|
||
font-size: 20px;
|
||
margin-top: -18px; /* provide offset for in-page anchors */
|
||
padding-top: 38px;
|
||
}
|
||
#identifiers dd {
|
||
max-width: 60%;
|
||
}
|
||
#toc {
|
||
position: fixed;
|
||
z-index: 2;
|
||
top: 0;
|
||
right: 0;
|
||
padding: 0;
|
||
margin: 0;
|
||
background-color: inherit;
|
||
border-bottom: 1px solid #ccc;
|
||
}
|
||
#toc h2 {
|
||
margin: -1px 0 0 0;
|
||
padding: 4px 0 4px 6px;
|
||
padding-right: 1em;
|
||
min-width: 190px;
|
||
font-size: 1.1em;
|
||
text-align: right;
|
||
background-color: #444;
|
||
color: white;
|
||
cursor: pointer;
|
||
}
|
||
#toc h2::before { /* css hamburger */
|
||
float: right;
|
||
position: relative;
|
||
width: 1em;
|
||
height: 1px;
|
||
left: -164px;
|
||
margin: 6px 0 0 0;
|
||
background: white none repeat scroll 0 0;
|
||
box-shadow: 0 4px 0 0 white, 0 8px 0 0 white;
|
||
content: "";
|
||
}
|
||
#toc nav {
|
||
display: none;
|
||
padding: 0.5em 1em 1em;
|
||
overflow: auto;
|
||
height: calc(100vh - 48px);
|
||
border-left: 1px solid #ddd;
|
||
}
|
||
}
|
||
|
||
/* alternative layout for wide screens */
|
||
@media screen and (min-width: 1024px) {
|
||
body {
|
||
max-width: 724px;
|
||
margin: 42px auto;
|
||
padding-left: 1.5em;
|
||
padding-right: 29em;
|
||
}
|
||
#toc {
|
||
position: fixed;
|
||
top: 42px;
|
||
right: 42px;
|
||
width: 25%;
|
||
margin: 0;
|
||
padding: 0 1em;
|
||
z-index: 1;
|
||
}
|
||
#toc h2 {
|
||
border-top: none;
|
||
border-bottom: 1px solid #ddd;
|
||
font-size: 1em;
|
||
font-weight: normal;
|
||
margin: 0;
|
||
padding: 0.25em 1em 1em 0;
|
||
}
|
||
#toc nav {
|
||
display: block;
|
||
height: calc(90vh - 84px);
|
||
bottom: 0;
|
||
padding: 0.5em 0 0;
|
||
overflow: auto;
|
||
}
|
||
img { /* future proofing */
|
||
max-width: 100%;
|
||
height: auto;
|
||
}
|
||
}
|
||
|
||
/* pagination */
|
||
@media print {
|
||
body {
|
||
width: 100%;
|
||
}
|
||
p {
|
||
orphans: 3;
|
||
widows: 3;
|
||
}
|
||
#n-copyright-notice {
|
||
border-bottom: none;
|
||
}
|
||
#toc, #n-introduction {
|
||
page-break-before: always;
|
||
}
|
||
#toc {
|
||
border-top: none;
|
||
padding-top: 0;
|
||
}
|
||
figure, pre {
|
||
page-break-inside: avoid;
|
||
}
|
||
figure {
|
||
overflow: scroll;
|
||
}
|
||
.breakable pre {
|
||
break-inside: auto;
|
||
}
|
||
h1, h2, h3, h4, h5, h6 {
|
||
page-break-after: avoid;
|
||
}
|
||
h2+*, h3+*, h4+*, h5+*, h6+* {
|
||
page-break-before: avoid;
|
||
}
|
||
pre {
|
||
white-space: pre-wrap;
|
||
word-wrap: break-word;
|
||
font-size: 10pt;
|
||
}
|
||
table {
|
||
border: 1px solid #ddd;
|
||
}
|
||
td {
|
||
border-top: 1px solid #ddd;
|
||
}
|
||
}
|
||
|
||
/* This is commented out here, as the string-set: doesn't
|
||
pass W3C validation currently */
|
||
/*
|
||
.ears thead .left {
|
||
string-set: ears-top-left content();
|
||
}
|
||
|
||
.ears thead .center {
|
||
string-set: ears-top-center content();
|
||
}
|
||
|
||
.ears thead .right {
|
||
string-set: ears-top-right content();
|
||
}
|
||
|
||
.ears tfoot .left {
|
||
string-set: ears-bottom-left content();
|
||
}
|
||
|
||
.ears tfoot .center {
|
||
string-set: ears-bottom-center content();
|
||
}
|
||
|
||
.ears tfoot .right {
|
||
string-set: ears-bottom-right content();
|
||
}
|
||
*/
|
||
|
||
@page :first {
|
||
padding-top: 0;
|
||
@top-left {
|
||
content: normal;
|
||
border: none;
|
||
}
|
||
@top-center {
|
||
content: normal;
|
||
border: none;
|
||
}
|
||
@top-right {
|
||
content: normal;
|
||
border: none;
|
||
}
|
||
}
|
||
|
||
@page {
|
||
size: A4;
|
||
margin-bottom: 45mm;
|
||
padding-top: 20px;
|
||
/* The following is commented out here, but set appropriately by in code, as
|
||
the content depends on the document */
|
||
/*
|
||
@top-left {
|
||
content: 'Internet-Draft';
|
||
vertical-align: bottom;
|
||
border-bottom: solid 1px #ccc;
|
||
}
|
||
@top-left {
|
||
content: string(ears-top-left);
|
||
vertical-align: bottom;
|
||
border-bottom: solid 1px #ccc;
|
||
}
|
||
@top-center {
|
||
content: string(ears-top-center);
|
||
vertical-align: bottom;
|
||
border-bottom: solid 1px #ccc;
|
||
}
|
||
@top-right {
|
||
content: string(ears-top-right);
|
||
vertical-align: bottom;
|
||
border-bottom: solid 1px #ccc;
|
||
}
|
||
@bottom-left {
|
||
content: string(ears-bottom-left);
|
||
vertical-align: top;
|
||
border-top: solid 1px #ccc;
|
||
}
|
||
@bottom-center {
|
||
content: string(ears-bottom-center);
|
||
vertical-align: top;
|
||
border-top: solid 1px #ccc;
|
||
}
|
||
@bottom-right {
|
||
content: '[Page ' counter(page) ']';
|
||
vertical-align: top;
|
||
border-top: solid 1px #ccc;
|
||
}
|
||
*/
|
||
|
||
}
|
||
|
||
/* Changes introduced to fix issues found during implementation */
|
||
/* Make sure links are clickable even if overlapped by following H* */
|
||
a {
|
||
z-index: 2;
|
||
}
|
||
/* Separate body from document info even without intervening H1 */
|
||
section {
|
||
clear: both;
|
||
}
|
||
|
||
|
||
/* Top align author divs, to avoid names without organization dropping level with org names */
|
||
.author {
|
||
vertical-align: top;
|
||
}
|
||
|
||
/* Leave room in document info to show Internet-Draft on one line */
|
||
#identifiers dt {
|
||
width: 8em;
|
||
}
|
||
|
||
/* Don't waste quite as much whitespace between label and value in doc info */
|
||
#identifiers dd {
|
||
margin-left: 1em;
|
||
}
|
||
|
||
/* Give floating toc a background color (needed when it's a div inside section */
|
||
#toc {
|
||
background-color: white;
|
||
}
|
||
|
||
/* Make the collapsed ToC header render white on gray also when it's a link */
|
||
@media screen and (max-width: 1023px) {
|
||
#toc h2 a,
|
||
#toc h2 a:link,
|
||
#toc h2 a:focus,
|
||
#toc h2 a:hover,
|
||
#toc a.toplink,
|
||
#toc a.toplink:hover {
|
||
color: white;
|
||
background-color: #444;
|
||
text-decoration: none;
|
||
}
|
||
}
|
||
|
||
/* Give the bottom of the ToC some whitespace */
|
||
@media screen and (min-width: 1024px) {
|
||
#toc {
|
||
padding: 0 0 1em 1em;
|
||
}
|
||
}
|
||
|
||
/* Style section numbers with more space between number and title */
|
||
.section-number {
|
||
padding-right: 0.5em;
|
||
}
|
||
|
||
/* prevent monospace from becoming overly large */
|
||
tt, code, pre {
|
||
font-size: 95%;
|
||
}
|
||
|
||
/* Fix the height/width aspect for ascii art*/
|
||
.sourcecode pre,
|
||
.art-text pre {
|
||
line-height: 1.12;
|
||
}
|
||
|
||
|
||
/* Add styling for a link in the ToC that points to the top of the document */
|
||
a.toplink {
|
||
float: right;
|
||
margin-right: 0.5em;
|
||
}
|
||
|
||
/* Fix the dl styling to match the RFC 7992 attributes */
|
||
dl > dt,
|
||
dl.dlParallel > dt {
|
||
float: left;
|
||
margin-right: 1em;
|
||
}
|
||
dl.dlNewline > dt {
|
||
float: none;
|
||
}
|
||
|
||
/* Provide styling for table cell text alignment */
|
||
table td.text-left,
|
||
table th.text-left {
|
||
text-align: left;
|
||
}
|
||
table td.text-center,
|
||
table th.text-center {
|
||
text-align: center;
|
||
}
|
||
table td.text-right,
|
||
table th.text-right {
|
||
text-align: right;
|
||
}
|
||
|
||
/* Make the alternative author contact information look less like just another
|
||
author, and group it closer with the primary author contact information */
|
||
.alternative-contact {
|
||
margin: 0.5em 0 0.25em 0;
|
||
}
|
||
address .non-ascii {
|
||
margin: 0 0 0 2em;
|
||
}
|
||
|
||
/* With it being possible to set tables with alignment
|
||
left, center, and right, { width: 100%; } does not make sense */
|
||
table {
|
||
width: auto;
|
||
}
|
||
|
||
/* Avoid reference text that sits in a block with very wide left margin,
|
||
because of a long floating dt label.*/
|
||
.references dd {
|
||
overflow: visible;
|
||
}
|
||
|
||
/* Control caption placement */
|
||
caption {
|
||
caption-side: bottom;
|
||
}
|
||
|
||
/* Limit the width of the author address vcard, so names in right-to-left
|
||
script don't end up on the other side of the page. */
|
||
|
||
address.vcard {
|
||
max-width: 30em;
|
||
margin-right: auto;
|
||
}
|
||
|
||
/* For address alignment dependent on LTR or RTL scripts */
|
||
address div.left {
|
||
text-align: left;
|
||
}
|
||
address div.right {
|
||
text-align: right;
|
||
}
|
||
|
||
/* Provide table alignment support. We can't use the alignX classes above
|
||
since they do unwanted things with caption and other styling. */
|
||
table.right {
|
||
margin-left: auto;
|
||
margin-right: 0;
|
||
}
|
||
table.center {
|
||
margin-left: auto;
|
||
margin-right: auto;
|
||
}
|
||
table.left {
|
||
margin-left: 0;
|
||
margin-right: auto;
|
||
}
|
||
|
||
/* Give the table caption label the same styling as the figcaption */
|
||
caption a[href] {
|
||
color: #222;
|
||
}
|
||
|
||
@media print {
|
||
.toplink {
|
||
display: none;
|
||
}
|
||
|
||
/* avoid overwriting the top border line with the ToC header */
|
||
#toc {
|
||
padding-top: 1px;
|
||
}
|
||
|
||
/* Avoid page breaks inside dl and author address entries */
|
||
.vcard {
|
||
page-break-inside: avoid;
|
||
}
|
||
|
||
}
|
||
/* Tweak the bcp14 keyword presentation */
|
||
.bcp14 {
|
||
font-variant: small-caps;
|
||
font-weight: bold;
|
||
font-size: 0.9em;
|
||
}
|
||
/* Tweak the invisible space above H* in order not to overlay links in text above */
|
||
h2 {
|
||
margin-top: -18px; /* provide offset for in-page anchors */
|
||
padding-top: 31px;
|
||
}
|
||
h3 {
|
||
margin-top: -18px; /* provide offset for in-page anchors */
|
||
padding-top: 24px;
|
||
}
|
||
h4 {
|
||
margin-top: -18px; /* provide offset for in-page anchors */
|
||
padding-top: 24px;
|
||
}
|
||
/* Float artwork pilcrow to the right */
|
||
@media screen {
|
||
.artwork a.pilcrow {
|
||
display: block;
|
||
line-height: 0.7;
|
||
margin-top: 0.15em;
|
||
}
|
||
}
|
||
/* Make pilcrows on dd visible */
|
||
@media screen {
|
||
dd:hover > a.pilcrow {
|
||
visibility: visible;
|
||
}
|
||
}
|
||
/* Make the placement of figcaption match that of a table's caption
|
||
by removing the figure's added bottom margin */
|
||
.alignLeft.art-text,
|
||
.alignCenter.art-text,
|
||
.alignRight.art-text {
|
||
margin-bottom: 0;
|
||
}
|
||
.alignLeft,
|
||
.alignCenter,
|
||
.alignRight {
|
||
margin: 1em 0 0 0;
|
||
}
|
||
/* In print, the pilcrow won't show on hover, so prevent it from taking up space,
|
||
possibly even requiring a new line */
|
||
@media print {
|
||
a.pilcrow {
|
||
display: none;
|
||
}
|
||
}
|
||
/* Styling for the external metadata */
|
||
div#external-metadata {
|
||
background-color: #eee;
|
||
padding: 0.5em;
|
||
margin-bottom: 0.5em;
|
||
display: none;
|
||
}
|
||
div#internal-metadata {
|
||
padding: 0.5em; /* to match the external-metadata padding */
|
||
}
|
||
/* Styling for title RFC Number */
|
||
h1#rfcnum {
|
||
clear: both;
|
||
margin: 0 0 -1em;
|
||
padding: 1em 0 0 0;
|
||
}
|
||
/* Make .olPercent look the same as <ol><li> */
|
||
dl.olPercent > dd {
|
||
margin-bottom: 0.25em;
|
||
min-height: initial;
|
||
}
|
||
/* Give aside some styling to set it apart */
|
||
aside {
|
||
border-left: 1px solid #ddd;
|
||
margin: 1em 0 1em 2em;
|
||
padding: 0.2em 2em;
|
||
}
|
||
aside > dl,
|
||
aside > ol,
|
||
aside > ul,
|
||
aside > table,
|
||
aside > p {
|
||
margin-bottom: 0.5em;
|
||
}
|
||
/* Additional page break settings */
|
||
@media print {
|
||
figcaption, table caption {
|
||
page-break-before: avoid;
|
||
}
|
||
}
|
||
/* Font size adjustments for print */
|
||
@media print {
|
||
body { font-size: 10pt; line-height: normal; max-width: 96%; }
|
||
h1 { font-size: 1.72em; padding-top: 1.5em; } /* 1*1.2*1.2*1.2 */
|
||
h2 { font-size: 1.44em; padding-top: 1.5em; } /* 1*1.2*1.2 */
|
||
h3 { font-size: 1.2em; padding-top: 1.5em; } /* 1*1.2 */
|
||
h4 { font-size: 1em; padding-top: 1.5em; }
|
||
h5, h6 { font-size: 1em; margin: initial; padding: 0.5em 0 0.3em; }
|
||
}
|
||
/* Sourcecode margin in print, when there's no pilcrow */
|
||
@media print {
|
||
.artwork,
|
||
.artwork > pre,
|
||
.sourcecode {
|
||
margin-bottom: 1em;
|
||
}
|
||
}
|
||
/* Avoid narrow tables forcing too narrow table captions, which may render badly */
|
||
table {
|
||
min-width: 20em;
|
||
}
|
||
/* ol type a */
|
||
ol.type-a { list-style-type: lower-alpha; }
|
||
ol.type-A { list-style-type: upper-alpha; }
|
||
ol.type-i { list-style-type: lower-roman; }
|
||
ol.type-I { list-style-type: upper-roman; }
|
||
/* Apply the print table and row borders in general, on request from the RPC,
|
||
and increase the contrast between border and odd row background slightly */
|
||
table {
|
||
border: 1px solid #ddd;
|
||
}
|
||
td {
|
||
border-top: 1px solid #ddd;
|
||
}
|
||
tr {
|
||
break-inside: avoid;
|
||
}
|
||
tr:nth-child(2n+1) > td {
|
||
background-color: #f8f8f8;
|
||
}
|
||
/* Use style rules to govern display of the TOC. */
|
||
@media screen and (max-width: 1023px) {
|
||
#toc nav { display: none; }
|
||
#toc.active nav { display: block; }
|
||
}
|
||
/* Add support for keepWithNext */
|
||
.keepWithNext {
|
||
break-after: avoid-page;
|
||
break-after: avoid-page;
|
||
}
|
||
/* Add support for keepWithPrevious */
|
||
.keepWithPrevious {
|
||
break-before: avoid-page;
|
||
}
|
||
/* Change the approach to avoiding breaks inside artwork etc. */
|
||
figure, pre, table, .artwork, .sourcecode {
|
||
break-before: auto;
|
||
break-after: auto;
|
||
}
|
||
/* Avoid breaks between <dt> and <dd> */
|
||
dl {
|
||
break-before: auto;
|
||
break-inside: auto;
|
||
}
|
||
dt {
|
||
break-before: auto;
|
||
break-after: avoid-page;
|
||
}
|
||
dd {
|
||
break-before: avoid-page;
|
||
break-after: auto;
|
||
orphans: 3;
|
||
widows: 3
|
||
}
|
||
span.break, dd.break {
|
||
margin-bottom: 0;
|
||
min-height: 0;
|
||
break-before: auto;
|
||
break-inside: auto;
|
||
break-after: auto;
|
||
}
|
||
/* Undo break-before ToC */
|
||
@media print {
|
||
#toc {
|
||
break-before: auto;
|
||
}
|
||
}
|
||
/* Text in compact lists should not get extra bottom margin space,
|
||
since that would makes the list not compact */
|
||
ul.compact p, .ulCompact p,
|
||
ol.compact p, .olCompact p {
|
||
margin: 0;
|
||
}
|
||
/* But the list as a whole needs the extra space at the end */
|
||
section ul.compact,
|
||
section .ulCompact,
|
||
section ol.compact,
|
||
section .olCompact {
|
||
margin-bottom: 1em; /* same as p not within ul.compact etc. */
|
||
}
|
||
/* The tt and code background above interferes with for instance table cell
|
||
backgrounds. Changed to something a bit more selective. */
|
||
tt, code {
|
||
background-color: transparent;
|
||
}
|
||
p tt, p code, li tt, li code, dt tt, dt code {
|
||
background-color: #f8f8f8;
|
||
}
|
||
/* Tweak the pre margin -- 0px doesn't come out well */
|
||
pre {
|
||
margin-top: 0.5px;
|
||
}
|
||
/* Tweak the compact list text */
|
||
ul.compact, .ulCompact,
|
||
ol.compact, .olCompact,
|
||
dl.compact, .dlCompact {
|
||
line-height: normal;
|
||
}
|
||
/* Don't add top margin for nested lists */
|
||
li > ul, li > ol, li > dl,
|
||
dd > ul, dd > ol, dd > dl,
|
||
dl > dd > dl {
|
||
margin-top: initial;
|
||
}
|
||
/* Elements that should not be rendered on the same line as a <dt> */
|
||
/* This should match the element list in writer.text.TextWriter.render_dl() */
|
||
dd > div.artwork:first-child,
|
||
dd > aside:first-child,
|
||
dd > blockquote:first-child,
|
||
dd > figure:first-child,
|
||
dd > ol:first-child,
|
||
dd > div.sourcecode:first-child,
|
||
dd > table:first-child,
|
||
dd > ul:first-child {
|
||
clear: left;
|
||
}
|
||
/* fix for weird browser behaviour when <dd/> is empty */
|
||
dt+dd:empty::before{
|
||
content: "\00a0";
|
||
}
|
||
/* Make paragraph spacing inside <li> smaller than in body text, to fit better within the list */
|
||
li > p {
|
||
margin-bottom: 0.5em
|
||
}
|
||
/* Don't let p margin spill out from inside list items */
|
||
li > p:last-of-type:only-child {
|
||
margin-bottom: 0;
|
||
}
|
||
</style>
|
||
<link href="rfc-local.css" rel="stylesheet" type="text/css">
|
||
<script type="application/javascript">async function addMetadata(){try{const e=document.styleSheets[0].cssRules;for(let t=0;t<e.length;t++)if(/#identifiers/.exec(e[t].selectorText)){const a=e[t].cssText.replace("#identifiers","#external-updates");document.styleSheets[0].insertRule(a,document.styleSheets[0].cssRules.length)}}catch(e){console.log(e)}const e=document.getElementById("external-metadata");if(e)try{var t,a="",o=function(e){const t=document.getElementsByTagName("meta");for(let a=0;a<t.length;a++)if(t[a].getAttribute("name")===e)return t[a].getAttribute("content");return""}("rfc.number");if(o){t="https://www.rfc-editor.org/rfc/rfc"+o+".json";try{const e=await fetch(t);a=await e.json()}catch(e){t=document.URL.indexOf("html")>=0?document.URL.replace(/html$/,"json"):document.URL+".json";const o=await fetch(t);a=await o.json()}}if(!a)return;e.style.display="block";const s="",d="https://datatracker.ietf.org/doc",n="https://datatracker.ietf.org/ipr/search",c="https://www.rfc-editor.org/info",l=a.doc_id.toLowerCase(),i=a.doc_id.slice(0,3).toLowerCase(),f=a.doc_id.slice(3).replace(/^0+/,""),u={status:"Status",obsoletes:"Obsoletes",obsoleted_by:"Obsoleted By",updates:"Updates",updated_by:"Updated By",see_also:"See Also",errata_url:"Errata"};let h="<dl style='overflow:hidden' id='external-updates'>";["status","obsoletes","obsoleted_by","updates","updated_by","see_also","errata_url"].forEach(e=>{if("status"==e){a[e]=a[e].toLowerCase();var t=a[e].split(" "),o=t.length,w="",p=1;for(let e=0;e<o;e++)p<o?w=w+r(t[e])+" ":w+=r(t[e]),p++;a[e]=w}else if("obsoletes"==e||"obsoleted_by"==e||"updates"==e||"updated_by"==e){var g,m="",b=1;g=a[e].length;for(let t=0;t<g;t++)a[e][t]&&(a[e][t]=String(a[e][t]).toLowerCase(),m=b<g?m+"<a href='"+s+"/rfc/".concat(a[e][t])+"'>"+a[e][t].slice(3)+"</a>, ":m+"<a href='"+s+"/rfc/".concat(a[e][t])+"'>"+a[e][t].slice(3)+"</a>",b++);a[e]=m}else if("see_also"==e){var y,L="",C=1;y=a[e].length;for(let t=0;t<y;t++)if(a[e][t]){a[e][t]=String(a[e][t]);var _=a[e][t].slice(0,3),v=a[e][t].slice(3).replace(/^0+/,"");L=C<y?"RFC"!=_?L+"<a href='"+s+"/info/"+_.toLowerCase().concat(v.toLowerCase())+"'>"+_+" "+v+"</a>, ":L+"<a href='"+s+"/info/"+_.toLowerCase().concat(v.toLowerCase())+"'>"+v+"</a>, ":"RFC"!=_?L+"<a href='"+s+"/info/"+_.toLowerCase().concat(v.toLowerCase())+"'>"+_+" "+v+"</a>":L+"<a href='"+s+"/info/"+_.toLowerCase().concat(v.toLowerCase())+"'>"+v+"</a>",C++}a[e]=L}else if("errata_url"==e){var R="";R=a[e]?R+"<a href='"+a[e]+"'>Errata exist</a> | <a href='"+d+"/"+l+"'>Datatracker</a>| <a href='"+n+"/?"+i+"="+f+"&submit="+i+"'>IPR</a> | <a href='"+c+"/"+l+"'>Info page</a>":"<a href='"+d+"/"+l+"'>Datatracker</a> | <a href='"+n+"/?"+i+"="+f+"&submit="+i+"'>IPR</a> | <a href='"+c+"/"+l+"'>Info page</a>",a[e]=R}""!=a[e]?"Errata"==u[e]?h+=`<dt>More info:</dt><dd>${a[e]}</dd>`:h+=`<dt>${u[e]}:</dt><dd>${a[e]}</dd>`:"Errata"==u[e]&&(h+=`<dt>More info:</dt><dd>${a[e]}</dd>`)}),h+="</dl>",e.innerHTML=h}catch(e){console.log(e)}else console.log("Could not locate metadata <div> element");function r(e){return e.charAt(0).toUpperCase()+e.slice(1)}}window.removeEventListener("load",addMetadata),window.addEventListener("load",addMetadata);</script>
|
||
</head>
|
||
<body class="xml2rfc">
|
||
<table class="ears">
|
||
<thead><tr>
|
||
<td class="left">Internet-Draft</td>
|
||
<td class="center">Agent Cascade Prevention</td>
|
||
<td class="right">March 2026</td>
|
||
</tr></thead>
|
||
<tfoot><tr>
|
||
<td class="left">Nennemann</td>
|
||
<td class="center">Expires 7 September 2026</td>
|
||
<td class="right">[Page]</td>
|
||
</tr></tfoot>
|
||
</table>
|
||
<div id="external-metadata" class="document-information"></div>
|
||
<div id="internal-metadata" class="document-information">
|
||
<dl id="identifiers">
|
||
<dt class="label-workgroup">Workgroup:</dt>
|
||
<dd class="workgroup">NMOP</dd>
|
||
<dt class="label-internet-draft">Internet-Draft:</dt>
|
||
<dd class="internet-draft">draft-nennemann-agent-cascade-prevention-00</dd>
|
||
<dt class="label-published">Published:</dt>
|
||
<dd class="published">
|
||
<time datetime="2026-03-06" class="published">6 March 2026</time>
|
||
</dd>
|
||
<dt class="label-intended-status">Intended Status:</dt>
|
||
<dd class="intended-status">Standards Track</dd>
|
||
<dt class="label-expires">Expires:</dt>
|
||
<dd class="expires"><time datetime="2026-09-07">7 September 2026</time></dd>
|
||
<dt class="label-authors">Author:</dt>
|
||
<dd class="authors">
|
||
<div class="author">
|
||
<div class="author-name">C. Nennemann</div>
|
||
<div class="org">Independent Researcher</div>
|
||
</div>
|
||
</dd>
|
||
</dl>
|
||
</div>
|
||
<h1 id="title">Agent Failure Cascade Prevention and Rollback</h1>
|
||
<section id="section-abstract">
|
||
<h2 id="abstract"><a href="#abstract" class="selfRef">Abstract</a></h2>
|
||
<p id="section-abstract-1">This document defines protocols for preventing agent failures from
|
||
cascading across interconnected autonomous systems and standardized
|
||
mechanisms for real-time rollback of incorrect agent decisions. It
|
||
specifies a circuit breaker protocol with well-defined state
|
||
transitions, failure domain isolation through bulkhead patterns, cascade
|
||
detection via error rate and latency analysis, and a distributed
|
||
rollback coordination protocol that walks the Execution Context Token
|
||
(ECT) DAG backwards to revert agent actions to a known-good state.
|
||
This document absorbs and supersedes the concepts introduced in earlier
|
||
AERR and ATD proposals.<a href="#section-abstract-1" class="pilcrow">¶</a></p>
|
||
</section>
|
||
<div id="status-of-memo">
|
||
<section id="section-boilerplate.1">
|
||
<h2 id="name-status-of-this-memo">
|
||
<a href="#name-status-of-this-memo" class="section-name selfRef">Status of This Memo</a>
|
||
</h2>
|
||
<p id="section-boilerplate.1-1">
|
||
This Internet-Draft is submitted in full conformance with the
|
||
provisions of BCP 78 and BCP 79.<a href="#section-boilerplate.1-1" class="pilcrow">¶</a></p>
|
||
<p id="section-boilerplate.1-2">
|
||
Internet-Drafts are working documents of the Internet Engineering Task
|
||
Force (IETF). Note that other groups may also distribute working
|
||
documents as Internet-Drafts. The list of current Internet-Drafts is
|
||
at <span><a href="https://datatracker.ietf.org/drafts/current/">https://datatracker.ietf.org/drafts/current/</a></span>.<a href="#section-boilerplate.1-2" class="pilcrow">¶</a></p>
|
||
<p id="section-boilerplate.1-3">
|
||
Internet-Drafts are draft documents valid for a maximum of six months
|
||
and may be updated, replaced, or obsoleted by other documents at any
|
||
time. It is inappropriate to use Internet-Drafts as reference
|
||
material or to cite them other than as "work in progress."<a href="#section-boilerplate.1-3" class="pilcrow">¶</a></p>
|
||
<p id="section-boilerplate.1-4">
|
||
This Internet-Draft will expire on 7 September 2026.<a href="#section-boilerplate.1-4" class="pilcrow">¶</a></p>
|
||
</section>
|
||
</div>
|
||
<div id="copyright">
|
||
<section id="section-boilerplate.2">
|
||
<h2 id="name-copyright-notice">
|
||
<a href="#name-copyright-notice" class="section-name selfRef">Copyright Notice</a>
|
||
</h2>
|
||
<p id="section-boilerplate.2-1">
|
||
Copyright (c) 2026 IETF Trust and the persons identified as the
|
||
document authors. All rights reserved.<a href="#section-boilerplate.2-1" class="pilcrow">¶</a></p>
|
||
<p id="section-boilerplate.2-2">
|
||
This document is subject to BCP 78 and the IETF Trust's Legal
|
||
Provisions Relating to IETF Documents
|
||
(<span><a href="https://trustee.ietf.org/license-info">https://trustee.ietf.org/license-info</a></span>) in effect on the date of
|
||
publication of this document. Please review these documents
|
||
carefully, as they describe your rights and restrictions with
|
||
respect to this document. Code Components extracted from this
|
||
document must include Revised BSD License text as described in
|
||
Section 4.e of the Trust Legal Provisions and are provided without
|
||
warranty as described in the Revised BSD License.<a href="#section-boilerplate.2-2" class="pilcrow">¶</a></p>
|
||
</section>
|
||
</div>
|
||
<div id="toc">
|
||
<section id="section-toc.1">
|
||
<a href="#" onclick="scroll(0,0)" class="toplink">▲</a><h2 id="name-table-of-contents">
|
||
<a href="#name-table-of-contents" class="section-name selfRef">Table of Contents</a>
|
||
</h2>
|
||
<nav class="toc"><ul class="compact toc ulBare ulEmpty">
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.1">
|
||
<p id="section-toc.1-1.1.1" class="keepWithNext"><a href="#section-1" class="auto internal xref">1</a>. <a href="#name-introduction" class="internal xref">Introduction</a></p>
|
||
</li>
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.2">
|
||
<p id="section-toc.1-1.2.1" class="keepWithNext"><a href="#section-2" class="auto internal xref">2</a>. <a href="#name-terminology" class="internal xref">Terminology</a></p>
|
||
</li>
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.3">
|
||
<p id="section-toc.1-1.3.1"><a href="#section-3" class="auto internal xref">3</a>. <a href="#name-failure-cascade-prevention" class="internal xref">Failure Cascade Prevention</a></p>
|
||
<ul class="compact toc ulBare ulEmpty">
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.3.2.1">
|
||
<p id="section-toc.1-1.3.2.1.1"><a href="#section-3.1" class="auto internal xref">3.1</a>. <a href="#name-cascade-model" class="internal xref">Cascade Model</a></p>
|
||
<ul class="compact toc ulBare ulEmpty">
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.3.2.1.2.1">
|
||
<p id="section-toc.1-1.3.2.1.2.1.1" class="keepWithNext"><a href="#section-3.1.1" class="auto internal xref">3.1.1</a>. <a href="#name-failure-domain-taxonomy" class="internal xref">Failure Domain Taxonomy</a></p>
|
||
</li>
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.3.2.1.2.2">
|
||
<p id="section-toc.1-1.3.2.1.2.2.1"><a href="#section-3.1.2" class="auto internal xref">3.1.2</a>. <a href="#name-propagation-vectors-in-agen" class="internal xref">Propagation Vectors in Agent Ecosystems</a></p>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.3.2.2">
|
||
<p id="section-toc.1-1.3.2.2.1"><a href="#section-3.2" class="auto internal xref">3.2</a>. <a href="#name-circuit-breaker-protocol" class="internal xref">Circuit Breaker Protocol</a></p>
|
||
<ul class="compact toc ulBare ulEmpty">
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.3.2.2.2.1">
|
||
<p id="section-toc.1-1.3.2.2.2.1.1"><a href="#section-3.2.1" class="auto internal xref">3.2.1</a>. <a href="#name-states" class="internal xref">States</a></p>
|
||
</li>
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.3.2.2.2.2">
|
||
<p id="section-toc.1-1.3.2.2.2.2.1"><a href="#section-3.2.2" class="auto internal xref">3.2.2</a>. <a href="#name-state-transition-rules" class="internal xref">State Transition Rules</a></p>
|
||
</li>
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.3.2.2.2.3">
|
||
<p id="section-toc.1-1.3.2.2.2.3.1"><a href="#section-3.2.3" class="auto internal xref">3.2.3</a>. <a href="#name-circuit-breaker-registratio" class="internal xref">Circuit Breaker Registration and Discovery</a></p>
|
||
</li>
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.3.2.2.2.4">
|
||
<p id="section-toc.1-1.3.2.2.2.4.1"><a href="#section-3.2.4" class="auto internal xref">3.2.4</a>. <a href="#name-ect-integration" class="internal xref">ECT Integration</a></p>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.3.2.3">
|
||
<p id="section-toc.1-1.3.2.3.1"><a href="#section-3.3" class="auto internal xref">3.3</a>. <a href="#name-failure-domain-isolation" class="internal xref">Failure Domain Isolation</a></p>
|
||
<ul class="compact toc ulBare ulEmpty">
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.3.2.3.2.1">
|
||
<p id="section-toc.1-1.3.2.3.2.1.1"><a href="#section-3.3.1" class="auto internal xref">3.3.1</a>. <a href="#name-blast-radius-containment-st" class="internal xref">Blast Radius Containment Strategies</a></p>
|
||
</li>
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.3.2.3.2.2">
|
||
<p id="section-toc.1-1.3.2.3.2.2.1"><a href="#section-3.3.2" class="auto internal xref">3.3.2</a>. <a href="#name-domain-boundary-enforcement" class="internal xref">Domain Boundary Enforcement</a></p>
|
||
</li>
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.3.2.3.2.3">
|
||
<p id="section-toc.1-1.3.2.3.2.3.1"><a href="#section-3.3.3" class="auto internal xref">3.3.3</a>. <a href="#name-bulkhead-patterns-for-agent" class="internal xref">Bulkhead Patterns for Agent Pools</a></p>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.3.2.4">
|
||
<p id="section-toc.1-1.3.2.4.1"><a href="#section-3.4" class="auto internal xref">3.4</a>. <a href="#name-cascade-detection" class="internal xref">Cascade Detection</a></p>
|
||
<ul class="compact toc ulBare ulEmpty">
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.3.2.4.2.1">
|
||
<p id="section-toc.1-1.3.2.4.2.1.1"><a href="#section-3.4.1" class="auto internal xref">3.4.1</a>. <a href="#name-detection-signals" class="internal xref">Detection Signals</a></p>
|
||
</li>
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.3.2.4.2.2">
|
||
<p id="section-toc.1-1.3.2.4.2.2.1"><a href="#section-3.4.2" class="auto internal xref">3.4.2</a>. <a href="#name-propagation-tracking-via-ec" class="internal xref">Propagation Tracking via ECT DAG Analysis</a></p>
|
||
</li>
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.3.2.4.2.3">
|
||
<p id="section-toc.1-1.3.2.4.2.3.1"><a href="#section-3.4.3" class="auto internal xref">3.4.3</a>. <a href="#name-alert-format-and-escalation" class="internal xref">Alert Format and Escalation</a></p>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.4">
|
||
<p id="section-toc.1-1.4.1"><a href="#section-4" class="auto internal xref">4</a>. <a href="#name-real-time-rollback" class="internal xref">Real-Time Rollback</a></p>
|
||
<ul class="compact toc ulBare ulEmpty">
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.4.2.1">
|
||
<p id="section-toc.1-1.4.2.1.1"><a href="#section-4.1" class="auto internal xref">4.1</a>. <a href="#name-rollback-model" class="internal xref">Rollback Model</a></p>
|
||
<ul class="compact toc ulBare ulEmpty">
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.4.2.1.2.1">
|
||
<p id="section-toc.1-1.4.2.1.2.1.1"><a href="#section-4.1.1" class="auto internal xref">4.1.1</a>. <a href="#name-walking-the-ect-dag-backwar" class="internal xref">Walking the ECT DAG Backwards</a></p>
|
||
</li>
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.4.2.1.2.2">
|
||
<p id="section-toc.1-1.4.2.1.2.2.1"><a href="#section-4.1.2" class="auto internal xref">4.1.2</a>. <a href="#name-compensating-actions-vs-sta" class="internal xref">Compensating Actions vs State Restoration</a></p>
|
||
</li>
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.4.2.1.2.3">
|
||
<p id="section-toc.1-1.4.2.1.2.3.1"><a href="#section-4.1.3" class="auto internal xref">4.1.3</a>. <a href="#name-rollback-scope" class="internal xref">Rollback Scope</a></p>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.4.2.2">
|
||
<p id="section-toc.1-1.4.2.2.1"><a href="#section-4.2" class="auto internal xref">4.2</a>. <a href="#name-checkpoint-protocol" class="internal xref">Checkpoint Protocol</a></p>
|
||
<ul class="compact toc ulBare ulEmpty">
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.4.2.2.2.1">
|
||
<p id="section-toc.1-1.4.2.2.2.1.1"><a href="#section-4.2.1" class="auto internal xref">4.2.1</a>. <a href="#name-checkpoint-creation" class="internal xref">Checkpoint Creation</a></p>
|
||
</li>
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.4.2.2.2.2">
|
||
<p id="section-toc.1-1.4.2.2.2.2.1"><a href="#section-4.2.2" class="auto internal xref">4.2.2</a>. <a href="#name-checkpoint-storage-and-retr" class="internal xref">Checkpoint Storage and Retrieval</a></p>
|
||
</li>
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.4.2.2.2.3">
|
||
<p id="section-toc.1-1.4.2.2.2.3.1"><a href="#section-4.2.3" class="auto internal xref">4.2.3</a>. <a href="#name-checkpoint-verification" class="internal xref">Checkpoint Verification</a></p>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.4.2.3">
|
||
<p id="section-toc.1-1.4.2.3.1"><a href="#section-4.3" class="auto internal xref">4.3</a>. <a href="#name-distributed-rollback-coordi" class="internal xref">Distributed Rollback Coordination</a></p>
|
||
<ul class="compact toc ulBare ulEmpty">
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.4.2.3.2.1">
|
||
<p id="section-toc.1-1.4.2.3.2.1.1"><a href="#section-4.3.1" class="auto internal xref">4.3.1</a>. <a href="#name-rollback-coordinator-role" class="internal xref">Rollback Coordinator Role</a></p>
|
||
</li>
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.4.2.3.2.2">
|
||
<p id="section-toc.1-1.4.2.3.2.2.1"><a href="#section-4.3.2" class="auto internal xref">4.3.2</a>. <a href="#name-two-phase-rollback-protocol" class="internal xref">Two-Phase Rollback Protocol</a></p>
|
||
</li>
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.4.2.3.2.3">
|
||
<p id="section-toc.1-1.4.2.3.2.3.1"><a href="#section-4.3.3" class="auto internal xref">4.3.3</a>. <a href="#name-partial-rollback-handling" class="internal xref">Partial Rollback Handling</a></p>
|
||
</li>
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.4.2.3.2.4">
|
||
<p id="section-toc.1-1.4.2.3.2.4.1"><a href="#section-4.3.4" class="auto internal xref">4.3.4</a>. <a href="#name-conflict-resolution-during-" class="internal xref">Conflict Resolution During Concurrent Rollbacks</a></p>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.4.2.4">
|
||
<p id="section-toc.1-1.4.2.4.1"><a href="#section-4.4" class="auto internal xref">4.4</a>. <a href="#name-rollback-evidence" class="internal xref">Rollback Evidence</a></p>
|
||
<ul class="compact toc ulBare ulEmpty">
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.4.2.4.2.1">
|
||
<p id="section-toc.1-1.4.2.4.2.1.1"><a href="#section-4.4.1" class="auto internal xref">4.4.1</a>. <a href="#name-ect-nodes-for-rollback-acti" class="internal xref">ECT Nodes for Rollback Actions</a></p>
|
||
</li>
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.4.2.4.2.2">
|
||
<p id="section-toc.1-1.4.2.4.2.2.1"><a href="#section-4.4.2" class="auto internal xref">4.4.2</a>. <a href="#name-rollback-audit-trail" class="internal xref">Rollback Audit Trail</a></p>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.5">
|
||
<p id="section-toc.1-1.5.1"><a href="#section-5" class="auto internal xref">5</a>. <a href="#name-ect-integration-2" class="internal xref">ECT Integration</a></p>
|
||
</li>
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.6">
|
||
<p id="section-toc.1-1.6.1"><a href="#section-6" class="auto internal xref">6</a>. <a href="#name-security-considerations" class="internal xref">Security Considerations</a></p>
|
||
<ul class="compact toc ulBare ulEmpty">
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.6.2.1">
|
||
<p id="section-toc.1-1.6.2.1.1"><a href="#section-6.1" class="auto internal xref">6.1</a>. <a href="#name-rollback-weaponization" class="internal xref">Rollback Weaponization</a></p>
|
||
</li>
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.6.2.2">
|
||
<p id="section-toc.1-1.6.2.2.1"><a href="#section-6.2" class="auto internal xref">6.2</a>. <a href="#name-circuit-breaker-manipulatio" class="internal xref">Circuit Breaker Manipulation</a></p>
|
||
</li>
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.6.2.3">
|
||
<p id="section-toc.1-1.6.2.3.1"><a href="#section-6.3" class="auto internal xref">6.3</a>. <a href="#name-checkpoint-integrity" class="internal xref">Checkpoint Integrity</a></p>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.7">
|
||
<p id="section-toc.1-1.7.1"><a href="#section-7" class="auto internal xref">7</a>. <a href="#name-iana-considerations" class="internal xref">IANA Considerations</a></p>
|
||
<ul class="compact toc ulBare ulEmpty">
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.7.2.1">
|
||
<p id="section-toc.1-1.7.2.1.1"><a href="#section-7.1" class="auto internal xref">7.1</a>. <a href="#name-registration-of-exec_act-va" class="internal xref">Registration of exec_act Values</a></p>
|
||
</li>
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.7.2.2">
|
||
<p id="section-toc.1-1.7.2.2.1"><a href="#section-7.2" class="auto internal xref">7.2</a>. <a href="#name-registration-of-ext-claims" class="internal xref">Registration of ext Claims</a></p>
|
||
</li>
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.7.2.3">
|
||
<p id="section-toc.1-1.7.2.3.1"><a href="#section-7.3" class="auto internal xref">7.3</a>. <a href="#name-well-known-uri-registration" class="internal xref">Well-Known URI Registration</a></p>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.8">
|
||
<p id="section-toc.1-1.8.1"><a href="#section-8" class="auto internal xref">8</a>. <a href="#name-references" class="internal xref">References</a></p>
|
||
<ul class="compact toc ulBare ulEmpty">
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.8.2.1">
|
||
<p id="section-toc.1-1.8.2.1.1"><a href="#section-8.1" class="auto internal xref">8.1</a>. <a href="#name-normative-references" class="internal xref">Normative References</a></p>
|
||
</li>
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.8.2.2">
|
||
<p id="section-toc.1-1.8.2.2.1"><a href="#section-8.2" class="auto internal xref">8.2</a>. <a href="#name-informative-references" class="internal xref">Informative References</a></p>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.9">
|
||
<p id="section-toc.1-1.9.1"><a href="#appendix-A" class="auto internal xref"></a><a href="#name-acknowledgments" class="internal xref">Acknowledgments</a></p>
|
||
</li>
|
||
<li class="compact toc ulBare ulEmpty" id="section-toc.1-1.10">
|
||
<p id="section-toc.1-1.10.1"><a href="#appendix-B" class="auto internal xref"></a><a href="#name-authors-address" class="internal xref">Author's Address</a></p>
|
||
</li>
|
||
</ul>
|
||
</nav>
|
||
</section>
|
||
</div>
|
||
<div id="introduction">
|
||
<section id="section-1">
|
||
<h2 id="name-introduction">
|
||
<a href="#section-1" class="section-number selfRef">1. </a><a href="#name-introduction" class="section-name selfRef">Introduction</a>
|
||
</h2>
|
||
<p id="section-1-1">Autonomous AI agents increasingly operate in interconnected
|
||
multi-agent systems where a single agent's failure can propagate
|
||
through the network, causing widespread service disruption. The IETF
|
||
gap analysis <span>[<a href="#I-D.nennemann-agent-gap-analysis" class="cite xref">I-D.nennemann-agent-gap-analysis</a>]</span> identified two
|
||
critical gaps in existing standards:<a href="#section-1-1" class="pilcrow">¶</a></p>
|
||
<ul class="normal">
|
||
<li class="normal" id="section-1-2.1">
|
||
<p id="section-1-2.1.1"><strong>Gap 2 (Cascade Prevention)</strong>: No standard mechanism exists for
|
||
containing failures within agent ecosystems. When one agent fails,
|
||
dependent agents continue sending requests to the failing agent,
|
||
amplifying the failure across the system.<a href="#section-1-2.1.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li class="normal" id="section-1-2.2">
|
||
<p id="section-1-2.2.1"><strong>Gap 4 (Rollback)</strong>: No standard protocol exists for reverting
|
||
incorrect agent decisions. When an autonomous agent misconfigures
|
||
a network device or makes an erroneous API call, there is no
|
||
interoperable way to undo the action or coordinate rollback across
|
||
multiple affected agents.<a href="#section-1-2.2.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
</ul>
|
||
<p id="section-1-3">This document addresses both gaps by defining:<a href="#section-1-3" class="pilcrow">¶</a></p>
|
||
<ol start="1" type="1" class="normal type-1" id="section-1-4">
|
||
<li id="section-1-4.1">
|
||
<p id="section-1-4.1.1">A circuit breaker protocol that stops failure propagation between
|
||
agents.<a href="#section-1-4.1.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-1-4.2">
|
||
<p id="section-1-4.2.1">Failure domain isolation mechanisms that contain blast radius.<a href="#section-1-4.2.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-1-4.3">
|
||
<p id="section-1-4.3.1">Cascade detection signals that identify propagating failures early.<a href="#section-1-4.3.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-1-4.4">
|
||
<p id="section-1-4.4.1">A distributed rollback protocol that coordinates state reversion
|
||
across multiple agents using the ECT DAG
|
||
<span>[<a href="#I-D.nennemann-wimse-ect" class="cite xref">I-D.nennemann-wimse-ect</a>]</span>.<a href="#section-1-4.4.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
</ol>
|
||
<p id="section-1-5">This specification absorbs and supersedes the concepts from the earlier
|
||
Agent Error Recovery and Rollback (AERR) and Agent Task DAG (ATD)
|
||
proposals, consolidating cascade prevention and rollback into a single
|
||
coherent protocol built on ECT infrastructure.<a href="#section-1-5" class="pilcrow">¶</a></p>
|
||
<p id="section-1-6">Design principles:<a href="#section-1-6" class="pilcrow">¶</a></p>
|
||
<ol start="1" type="1" class="normal type-1" id="section-1-7">
|
||
<li id="section-1-7.1">
|
||
<p id="section-1-7.1.1">Agents that take consequential actions <span class="bcp14">MUST</span> be able to undo them,
|
||
or <span class="bcp14">MUST</span> declare them irreversible upfront.<a href="#section-1-7.1.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-1-7.2">
|
||
<p id="section-1-7.2.1">Failure containment takes priority over failure diagnosis.<a href="#section-1-7.2.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-1-7.3">
|
||
<p id="section-1-7.3.1">The protocol adds minimal overhead to the happy path.<a href="#section-1-7.3.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-1-7.4">
|
||
<p id="section-1-7.4.1">All cascade prevention and rollback actions are recorded as ECT
|
||
nodes, providing a cryptographic audit trail.<a href="#section-1-7.4.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
</ol>
|
||
</section>
|
||
</div>
|
||
<div id="terminology">
|
||
<section id="section-2">
|
||
<h2 id="name-terminology">
|
||
<a href="#section-2" class="section-number selfRef">2. </a><a href="#name-terminology" class="section-name selfRef">Terminology</a>
|
||
</h2>
|
||
<p id="section-2-1">The key words "<span class="bcp14">MUST</span>", "<span class="bcp14">MUST NOT</span>", "<span class="bcp14">REQUIRED</span>", "<span class="bcp14">SHALL</span>", "<span class="bcp14">SHALL NOT</span>", "<span class="bcp14">SHOULD</span>", "<span class="bcp14">SHOULD NOT</span>", "<span class="bcp14">RECOMMENDED</span>", "<span class="bcp14">NOT RECOMMENDED</span>",
|
||
"<span class="bcp14">MAY</span>", and "<span class="bcp14">OPTIONAL</span>" in this document are to be interpreted as
|
||
described in BCP 14 <span>[<a href="#RFC2119" class="cite xref">RFC2119</a>]</span> <span>[<a href="#RFC8174" class="cite xref">RFC8174</a>]</span> when, and only when, they
|
||
appear in all capitals, as shown here.<a href="#section-2-1" class="pilcrow">¶</a></p>
|
||
<span class="break"></span><dl class="dlParallel" id="section-2-2">
|
||
<dt id="section-2-2.1">Circuit Breaker:</dt>
|
||
<dd style="margin-left: 1.5em" id="section-2-2.2">
|
||
<p id="section-2-2.2.1">A mechanism that stops an agent from propagating requests to a
|
||
failing downstream agent, preventing cascading failures. Modeled
|
||
after the electrical circuit breaker pattern used in microservice
|
||
architectures.<a href="#section-2-2.2.1" class="pilcrow">¶</a></p>
|
||
</dd>
|
||
<dd class="break"></dd>
|
||
<dt id="section-2-2.3">Failure Domain:</dt>
|
||
<dd style="margin-left: 1.5em" id="section-2-2.4">
|
||
<p id="section-2-2.4.1">A bounded set of agents and resources within which a failure is
|
||
contained. Failures within a domain <span class="bcp14">MUST NOT</span> propagate beyond the
|
||
domain boundary without explicit escalation.<a href="#section-2-2.4.1" class="pilcrow">¶</a></p>
|
||
</dd>
|
||
<dd class="break"></dd>
|
||
<dt id="section-2-2.5">Blast Radius:</dt>
|
||
<dd style="margin-left: 1.5em" id="section-2-2.6">
|
||
<p id="section-2-2.6.1">The set of agents and systems affected by a single agent's failure,
|
||
determinable by traversing the ECT DAG forward from the failing
|
||
node.<a href="#section-2-2.6.1" class="pilcrow">¶</a></p>
|
||
</dd>
|
||
<dd class="break"></dd>
|
||
<dt id="section-2-2.7">Cascade Detection:</dt>
|
||
<dd style="margin-left: 1.5em" id="section-2-2.8">
|
||
<p id="section-2-2.8.1">The process of identifying that a failure is propagating across
|
||
agent boundaries, using signals such as error rate spikes, latency
|
||
increases, and resource exhaustion patterns.<a href="#section-2-2.8.1" class="pilcrow">¶</a></p>
|
||
</dd>
|
||
<dd class="break"></dd>
|
||
<dt id="section-2-2.9">Rollback Coordinator:</dt>
|
||
<dd style="margin-left: 1.5em" id="section-2-2.10">
|
||
<p id="section-2-2.10.1">An agent or orchestrator responsible for coordinating distributed
|
||
rollback across multiple agents in a workflow, ensuring consistency
|
||
and resolving conflicts.<a href="#section-2-2.10.1" class="pilcrow">¶</a></p>
|
||
</dd>
|
||
<dd class="break"></dd>
|
||
<dt id="section-2-2.11">Checkpoint:</dt>
|
||
<dd style="margin-left: 1.5em" id="section-2-2.12">
|
||
<p id="section-2-2.12.1">An ECT node recording an agent's state hash before a consequential
|
||
action, providing a restore point for rollback.<a href="#section-2-2.12.1" class="pilcrow">¶</a></p>
|
||
</dd>
|
||
<dd class="break"></dd>
|
||
<dt id="section-2-2.13">Compensating Action:</dt>
|
||
<dd style="margin-left: 1.5em" id="section-2-2.14">
|
||
<p id="section-2-2.14.1">An action that semantically reverses the effect of a prior action
|
||
when direct state restoration is not possible (e.g., deleting a
|
||
resource that was created, rather than restoring a pre-creation
|
||
snapshot).<a href="#section-2-2.14.1" class="pilcrow">¶</a></p>
|
||
</dd>
|
||
<dd class="break"></dd>
|
||
<dt id="section-2-2.15">Recovery Point:</dt>
|
||
<dd style="margin-left: 1.5em" id="section-2-2.16">
|
||
<p id="section-2-2.16.1">The most recent checkpoint in the ECT DAG to which an agent or
|
||
workflow can be safely rolled back without violating consistency
|
||
constraints.<a href="#section-2-2.16.1" class="pilcrow">¶</a></p>
|
||
</dd>
|
||
<dd class="break"></dd>
|
||
</dl>
|
||
</section>
|
||
</div>
|
||
<div id="failure-cascade-prevention">
|
||
<section id="section-3">
|
||
<h2 id="name-failure-cascade-prevention">
|
||
<a href="#section-3" class="section-number selfRef">3. </a><a href="#name-failure-cascade-prevention" class="section-name selfRef">Failure Cascade Prevention</a>
|
||
</h2>
|
||
<div id="cascade-model">
|
||
<section id="section-3.1">
|
||
<h3 id="name-cascade-model">
|
||
<a href="#section-3.1" class="section-number selfRef">3.1. </a><a href="#name-cascade-model" class="section-name selfRef">Cascade Model</a>
|
||
</h3>
|
||
<p id="section-3.1-1">When an agent fails in a multi-agent system, the failure can
|
||
propagate through multiple vectors. The following diagram
|
||
illustrates a typical cascade scenario:<a href="#section-3.1-1" class="pilcrow">¶</a></p>
|
||
<span id="name-failure-cascade-propagation"></span><div id="fig-cascade">
|
||
<figure id="figure-1">
|
||
<div class="alignLeft art-text artwork" id="section-3.1-2.1">
|
||
<pre>
|
||
Agent A Agent B Agent C Agent D
|
||
| | | |
|
||
| request | | |
|
||
|--------------->| | |
|
||
| | request | |
|
||
| |--------------->| |
|
||
| | | request |
|
||
| | |--------------->|
|
||
| | | |
|
||
| | | FAILURE |
|
||
| | |<--- X ---------|
|
||
| | | |
|
||
| | error/timeout | |
|
||
| |<---------------| |
|
||
| | | |
|
||
| error/timeout | | |
|
||
|<---------------| | |
|
||
| | | |
|
||
| [CASCADE: all agents impacted by D's failure] |
|
||
| | | |
|
||
</pre>
|
||
</div>
|
||
<figcaption><a href="#figure-1" class="selfRef">Figure 1</a>:
|
||
<a href="#name-failure-cascade-propagation" class="selfRef">Failure Cascade Propagation</a>
|
||
</figcaption></figure>
|
||
</div>
|
||
<div id="failure-domain-taxonomy">
|
||
<section id="section-3.1.1">
|
||
<h4 id="name-failure-domain-taxonomy">
|
||
<a href="#section-3.1.1" class="section-number selfRef">3.1.1. </a><a href="#name-failure-domain-taxonomy" class="section-name selfRef">Failure Domain Taxonomy</a>
|
||
</h4>
|
||
<p id="section-3.1.1-1">Failures in agent ecosystems fall into the following categories:<a href="#section-3.1.1-1" class="pilcrow">¶</a></p>
|
||
<span class="break"></span><dl class="dlParallel" id="section-3.1.1-2">
|
||
<dt id="section-3.1.1-2.1">Agent-Local Failure:</dt>
|
||
<dd style="margin-left: 1.5em" id="section-3.1.1-2.2">
|
||
<p id="section-3.1.1-2.2.1">A failure confined to a single agent instance (e.g., out-of-memory,
|
||
logic error). The blast radius is limited to the agent itself and
|
||
its immediate callers.<a href="#section-3.1.1-2.2.1" class="pilcrow">¶</a></p>
|
||
</dd>
|
||
<dd class="break"></dd>
|
||
<dt id="section-3.1.1-2.3">Service Failure:</dt>
|
||
<dd style="margin-left: 1.5em" id="section-3.1.1-2.4">
|
||
<p id="section-3.1.1-2.4.1">A failure affecting all instances of a particular agent service
|
||
(e.g., model endpoint unavailable). The blast radius includes all
|
||
agents that depend on the failing service.<a href="#section-3.1.1-2.4.1" class="pilcrow">¶</a></p>
|
||
</dd>
|
||
<dd class="break"></dd>
|
||
<dt id="section-3.1.1-2.5">Infrastructure Failure:</dt>
|
||
<dd style="margin-left: 1.5em" id="section-3.1.1-2.6">
|
||
<p id="section-3.1.1-2.6.1">A failure in shared infrastructure (e.g., network partition,
|
||
certificate authority unavailable). The blast radius may span
|
||
multiple failure domains.<a href="#section-3.1.1-2.6.1" class="pilcrow">¶</a></p>
|
||
</dd>
|
||
<dd class="break"></dd>
|
||
<dt id="section-3.1.1-2.7">Semantic Failure:</dt>
|
||
<dd style="margin-left: 1.5em" id="section-3.1.1-2.8">
|
||
<p id="section-3.1.1-2.8.1">An agent produces incorrect output without raising an error (e.g.,
|
||
misconfiguration, wrong decision). This is the hardest category
|
||
to detect and may propagate silently through the DAG.<a href="#section-3.1.1-2.8.1" class="pilcrow">¶</a></p>
|
||
</dd>
|
||
<dd class="break"></dd>
|
||
</dl>
|
||
</section>
|
||
</div>
|
||
<div id="propagation-vectors-in-agent-ecosystems">
|
||
<section id="section-3.1.2">
|
||
<h4 id="name-propagation-vectors-in-agen">
|
||
<a href="#section-3.1.2" class="section-number selfRef">3.1.2. </a><a href="#name-propagation-vectors-in-agen" class="section-name selfRef">Propagation Vectors in Agent Ecosystems</a>
|
||
</h4>
|
||
<p id="section-3.1.2-1">Failures propagate through the following vectors:<a href="#section-3.1.2-1" class="pilcrow">¶</a></p>
|
||
<ol start="1" type="1" class="normal type-1" id="section-3.1.2-2">
|
||
<li id="section-3.1.2-2.1">
|
||
<p id="section-3.1.2-2.1.1"><strong>Synchronous request chains</strong>: An agent blocks waiting for a
|
||
failing downstream agent, causing its own callers to time out.<a href="#section-3.1.2-2.1.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-3.1.2-2.2">
|
||
<p id="section-3.1.2-2.2.1"><strong>Shared state corruption</strong>: An agent writes incorrect data to a
|
||
shared store, causing other agents reading that data to fail or
|
||
make incorrect decisions.<a href="#section-3.1.2-2.2.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-3.1.2-2.3">
|
||
<p id="section-3.1.2-2.3.1"><strong>Resource exhaustion</strong>: A failing agent consumes excessive
|
||
resources (connections, memory, compute), starving healthy agents.<a href="#section-3.1.2-2.3.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-3.1.2-2.4">
|
||
<p id="section-3.1.2-2.4.1"><strong>Retry amplification</strong>: Multiple agents retry requests to a
|
||
failing agent simultaneously, overwhelming it further.<a href="#section-3.1.2-2.4.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
</ol>
|
||
</section>
|
||
</div>
|
||
</section>
|
||
</div>
|
||
<div id="circuit-breaker-protocol">
|
||
<section id="section-3.2">
|
||
<h3 id="name-circuit-breaker-protocol">
|
||
<a href="#section-3.2" class="section-number selfRef">3.2. </a><a href="#name-circuit-breaker-protocol" class="section-name selfRef">Circuit Breaker Protocol</a>
|
||
</h3>
|
||
<p id="section-3.2-1">Each agent <span class="bcp14">MUST</span> implement a circuit breaker for every downstream
|
||
agent it communicates with.<a href="#section-3.2-1" class="pilcrow">¶</a></p>
|
||
<div id="states">
|
||
<section id="section-3.2.1">
|
||
<h4 id="name-states">
|
||
<a href="#section-3.2.1" class="section-number selfRef">3.2.1. </a><a href="#name-states" class="section-name selfRef">States</a>
|
||
</h4>
|
||
<p id="section-3.2.1-1">The circuit breaker has four states:<a href="#section-3.2.1-1" class="pilcrow">¶</a></p>
|
||
<span class="break"></span><dl class="dlParallel" id="section-3.2.1-2">
|
||
<dt id="section-3.2.1-2.1">CLOSED (normal):</dt>
|
||
<dd style="margin-left: 1.5em" id="section-3.2.1-2.2">
|
||
<p id="section-3.2.1-2.2.1">Requests flow through normally. The agent tracks the error rate
|
||
over a sliding window (default: 60 seconds).<a href="#section-3.2.1-2.2.1" class="pilcrow">¶</a></p>
|
||
</dd>
|
||
<dd class="break"></dd>
|
||
<dt id="section-3.2.1-2.3">OPEN (failure detected):</dt>
|
||
<dd style="margin-left: 1.5em" id="section-3.2.1-2.4">
|
||
<p id="section-3.2.1-2.4.1">When the error rate exceeds the configured threshold (default: 50%
|
||
over the window), the breaker opens. All requests to the
|
||
downstream agent are immediately rejected locally. The agent
|
||
<span class="bcp14">MUST</span> emit an ECT with <code>exec_act</code> value <code>"circuit_breaker_open"</code>.<a href="#section-3.2.1-2.4.1" class="pilcrow">¶</a></p>
|
||
</dd>
|
||
<dd class="break"></dd>
|
||
<dt id="section-3.2.1-2.5">HALF_OPEN (recovery probe):</dt>
|
||
<dd style="margin-left: 1.5em" id="section-3.2.1-2.6">
|
||
<p id="section-3.2.1-2.6.1">After a cooldown period (default: 30 seconds), the breaker
|
||
transitions to HALF_OPEN and allows a single probe request. If
|
||
the probe succeeds, the breaker returns to CLOSED. If the probe
|
||
fails, the breaker returns to OPEN with doubled cooldown
|
||
(exponential backoff, maximum 300 seconds).<a href="#section-3.2.1-2.6.1" class="pilcrow">¶</a></p>
|
||
</dd>
|
||
<dd class="break"></dd>
|
||
<dt id="section-3.2.1-2.7">CLOSED (recovered):</dt>
|
||
<dd style="margin-left: 1.5em" id="section-3.2.1-2.8">
|
||
<p id="section-3.2.1-2.8.1">When a probe succeeds in the HALF_OPEN state, the breaker returns
|
||
to CLOSED and the agent <span class="bcp14">MUST</span> emit an ECT with <code>exec_act</code> value
|
||
<code>"circuit_breaker_close"</code>.<a href="#section-3.2.1-2.8.1" class="pilcrow">¶</a></p>
|
||
</dd>
|
||
<dd class="break"></dd>
|
||
</dl>
|
||
</section>
|
||
</div>
|
||
<div id="state-transition-rules">
|
||
<section id="section-3.2.2">
|
||
<h4 id="name-state-transition-rules">
|
||
<a href="#section-3.2.2" class="section-number selfRef">3.2.2. </a><a href="#name-state-transition-rules" class="section-name selfRef">State Transition Rules</a>
|
||
</h4>
|
||
<span id="name-circuit-breaker-state-machi"></span><div id="fig-circuit-fsm">
|
||
<figure id="figure-2">
|
||
<div class="alignLeft art-text artwork" id="section-3.2.2-1.1">
|
||
<pre>
|
||
error_rate > threshold
|
||
CLOSED ────────────────────────────────► OPEN
|
||
▲ │
|
||
│ probe succeeds │ cooldown expires
|
||
│ ▼
|
||
└──────────────────────────────── HALF_OPEN
|
||
│
|
||
probe fails │
|
||
▼
|
||
OPEN
|
||
(cooldown *= 2,
|
||
max 300s)
|
||
</pre>
|
||
</div>
|
||
<figcaption><a href="#figure-2" class="selfRef">Figure 2</a>:
|
||
<a href="#name-circuit-breaker-state-machi" class="selfRef">Circuit Breaker State Machine</a>
|
||
</figcaption></figure>
|
||
</div>
|
||
<p id="section-3.2.2-2">The following rules govern state transitions:<a href="#section-3.2.2-2" class="pilcrow">¶</a></p>
|
||
<ol start="1" type="1" class="normal type-1" id="section-3.2.2-3">
|
||
<li id="section-3.2.2-3.1">
|
||
<p id="section-3.2.2-3.1.1">CLOSED to OPEN: The error rate over the sliding window exceeds
|
||
the configured threshold. The agent <span class="bcp14">MUST</span> emit a
|
||
<code>"circuit_breaker_open"</code> ECT and reject all subsequent requests
|
||
to the downstream agent.<a href="#section-3.2.2-3.1.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-3.2.2-3.2">
|
||
<p id="section-3.2.2-3.2.1">OPEN to HALF_OPEN: The cooldown timer expires. The agent <span class="bcp14">MUST</span>
|
||
allow exactly one probe request through.<a href="#section-3.2.2-3.2.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-3.2.2-3.3">
|
||
<p id="section-3.2.2-3.3.1">HALF_OPEN to CLOSED: The probe request succeeds. The agent <span class="bcp14">MUST</span>
|
||
emit a <code>"circuit_breaker_close"</code> ECT and resume normal operation.
|
||
The error rate counters <span class="bcp14">MUST</span> be reset.<a href="#section-3.2.2-3.3.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-3.2.2-3.4">
|
||
<p id="section-3.2.2-3.4.1">HALF_OPEN to OPEN: The probe request fails. The cooldown period
|
||
<span class="bcp14">MUST</span> be doubled (up to a maximum of 300 seconds).<a href="#section-3.2.2-3.4.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
</ol>
|
||
</section>
|
||
</div>
|
||
<div id="circuit-breaker-registration-and-discovery">
|
||
<section id="section-3.2.3">
|
||
<h4 id="name-circuit-breaker-registratio">
|
||
<a href="#section-3.2.3" class="section-number selfRef">3.2.3. </a><a href="#name-circuit-breaker-registratio" class="section-name selfRef">Circuit Breaker Registration and Discovery</a>
|
||
</h4>
|
||
<p id="section-3.2.3-1">Agents <span class="bcp14">MUST</span> expose circuit breaker state at a well-known endpoint:<a href="#section-3.2.3-1" class="pilcrow">¶</a></p>
|
||
<div class="alignLeft art-text artwork" id="section-3.2.3-2">
|
||
<pre>
|
||
GET /.well-known/cascade/circuits HTTP/1.1
|
||
</pre><a href="#section-3.2.3-2" class="pilcrow">¶</a>
|
||
</div>
|
||
<p id="section-3.2.3-3">Response:<a href="#section-3.2.3-3" class="pilcrow">¶</a></p>
|
||
<span id="name-circuit-breaker-status-endp"></span><div id="fig-circuits">
|
||
<figure id="figure-3">
|
||
<div class="lang-json sourcecode" id="section-3.2.3-4.1">
|
||
<pre>
|
||
{
|
||
"circuits": [
|
||
{
|
||
"downstream_agent": "spiffe://example.com/agent/router-mgr",
|
||
"state": "open",
|
||
"error_rate": 0.75,
|
||
"window_s": 60,
|
||
"last_failure_ect": "550e8400-e29b-41d4-a716-446655440099",
|
||
"cooldown_remaining_s": 22
|
||
}
|
||
]
|
||
}
|
||
</pre>
|
||
</div>
|
||
<figcaption><a href="#figure-3" class="selfRef">Figure 3</a>:
|
||
<a href="#name-circuit-breaker-status-endp" class="selfRef">Circuit Breaker Status Endpoint</a>
|
||
</figcaption></figure>
|
||
</div>
|
||
</section>
|
||
</div>
|
||
<div id="ect-integration">
|
||
<section id="section-3.2.4">
|
||
<h4 id="name-ect-integration">
|
||
<a href="#section-3.2.4" class="section-number selfRef">3.2.4. </a><a href="#name-ect-integration" class="section-name selfRef">ECT Integration</a>
|
||
</h4>
|
||
<p id="section-3.2.4-1">Each circuit breaker state change <span class="bcp14">MUST</span> produce an ECT node:<a href="#section-3.2.4-1" class="pilcrow">¶</a></p>
|
||
<span id="name-circuit-breaker-open-ect"></span><div id="fig-cb-ect">
|
||
<figure id="figure-4">
|
||
<div class="lang-json sourcecode" id="section-3.2.4-2.1">
|
||
<pre>
|
||
{
|
||
"jti": "cb-open-uuid",
|
||
"exec_act": "circuit_breaker_open",
|
||
"par": ["error-ect-uuid"],
|
||
"ext": {
|
||
"cascade.downstream_agent":
|
||
"spiffe://example.com/agent/router-mgr",
|
||
"cascade.error_rate": 0.75,
|
||
"cascade.window_s": 60,
|
||
"cascade.cooldown_s": 30
|
||
}
|
||
}
|
||
</pre>
|
||
</div>
|
||
<figcaption><a href="#figure-4" class="selfRef">Figure 4</a>:
|
||
<a href="#name-circuit-breaker-open-ect" class="selfRef">Circuit Breaker Open ECT</a>
|
||
</figcaption></figure>
|
||
</div>
|
||
<span id="name-circuit-breaker-close-ect"></span><div id="fig-cb-close-ect">
|
||
<figure id="figure-5">
|
||
<div class="lang-json sourcecode" id="section-3.2.4-3.1">
|
||
<pre>
|
||
{
|
||
"jti": "cb-close-uuid",
|
||
"exec_act": "circuit_breaker_close",
|
||
"par": ["cb-open-uuid"],
|
||
"ext": {
|
||
"cascade.downstream_agent":
|
||
"spiffe://example.com/agent/router-mgr",
|
||
"cascade.total_cooldown_s": 30
|
||
}
|
||
}
|
||
</pre>
|
||
</div>
|
||
<figcaption><a href="#figure-5" class="selfRef">Figure 5</a>:
|
||
<a href="#name-circuit-breaker-close-ect" class="selfRef">Circuit Breaker Close ECT</a>
|
||
</figcaption></figure>
|
||
</div>
|
||
</section>
|
||
</div>
|
||
</section>
|
||
</div>
|
||
<div id="failure-domain-isolation">
|
||
<section id="section-3.3">
|
||
<h3 id="name-failure-domain-isolation">
|
||
<a href="#section-3.3" class="section-number selfRef">3.3. </a><a href="#name-failure-domain-isolation" class="section-name selfRef">Failure Domain Isolation</a>
|
||
</h3>
|
||
<div id="blast-radius-containment-strategies">
|
||
<section id="section-3.3.1">
|
||
<h4 id="name-blast-radius-containment-st">
|
||
<a href="#section-3.3.1" class="section-number selfRef">3.3.1. </a><a href="#name-blast-radius-containment-st" class="section-name selfRef">Blast Radius Containment Strategies</a>
|
||
</h4>
|
||
<p id="section-3.3.1-1">Agents <span class="bcp14">MUST</span> implement the following containment strategies:<a href="#section-3.3.1-1" class="pilcrow">¶</a></p>
|
||
<ol start="1" type="1" class="normal type-1" id="section-3.3.1-2">
|
||
<li id="section-3.3.1-2.1">
|
||
<p id="section-3.3.1-2.1.1"><strong>Request rejection at the boundary</strong>: When a circuit breaker
|
||
opens, the agent <span class="bcp14">MUST</span> return a structured error to its callers
|
||
indicating that the downstream dependency is unavailable, rather
|
||
than propagating the failure.<a href="#section-3.3.1-2.1.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-3.3.1-2.2">
|
||
<p id="section-3.3.1-2.2.1"><strong>Timeout enforcement</strong>: Agents <span class="bcp14">MUST</span> enforce timeouts on all
|
||
downstream requests. The timeout <span class="bcp14">MUST</span> be shorter than the
|
||
caller's timeout to prevent timeout cascades.<a href="#section-3.3.1-2.2.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-3.3.1-2.3">
|
||
<p id="section-3.3.1-2.3.1"><strong>Graceful degradation</strong>: When a non-critical downstream agent
|
||
is unavailable, agents <span class="bcp14">SHOULD</span> continue operating with reduced
|
||
functionality rather than failing entirely.<a href="#section-3.3.1-2.3.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
</ol>
|
||
</section>
|
||
</div>
|
||
<div id="domain-boundary-enforcement">
|
||
<section id="section-3.3.2">
|
||
<h4 id="name-domain-boundary-enforcement">
|
||
<a href="#section-3.3.2" class="section-number selfRef">3.3.2. </a><a href="#name-domain-boundary-enforcement" class="section-name selfRef">Domain Boundary Enforcement</a>
|
||
</h4>
|
||
<p id="section-3.3.2-1">Failure domains are defined by the workflow topology in the ECT DAG.
|
||
Each workflow (identified by the <code>wid</code> claim) constitutes a failure
|
||
domain. Cross-workflow failures <span class="bcp14">MUST</span> be escalated through the HITL
|
||
mechanism <span>[<a href="#I-D.nennemann-agent-dag-hitl-safety" class="cite xref">I-D.nennemann-agent-dag-hitl-safety</a>]</span> rather than
|
||
propagating automatically.<a href="#section-3.3.2-1" class="pilcrow">¶</a></p>
|
||
<p id="section-3.3.2-2">Agents at domain boundaries <span class="bcp14">MUST</span>:<a href="#section-3.3.2-2" class="pilcrow">¶</a></p>
|
||
<ol start="1" type="1" class="normal type-1" id="section-3.3.2-3">
|
||
<li id="section-3.3.2-3.1">
|
||
<p id="section-3.3.2-3.1.1">Validate all incoming requests against the circuit breaker state
|
||
of their downstream dependencies before accepting work.<a href="#section-3.3.2-3.1.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-3.3.2-3.2">
|
||
<p id="section-3.3.2-3.2.1">Emit a <code>"circuit_breaker_open"</code> ECT when rejecting work due to
|
||
downstream unavailability.<a href="#section-3.3.2-3.2.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-3.3.2-3.3">
|
||
<p id="section-3.3.2-3.3.1">Report domain health status via the circuits endpoint.<a href="#section-3.3.2-3.3.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
</ol>
|
||
</section>
|
||
</div>
|
||
<div id="bulkhead-patterns-for-agent-pools">
|
||
<section id="section-3.3.3">
|
||
<h4 id="name-bulkhead-patterns-for-agent">
|
||
<a href="#section-3.3.3" class="section-number selfRef">3.3.3. </a><a href="#name-bulkhead-patterns-for-agent" class="section-name selfRef">Bulkhead Patterns for Agent Pools</a>
|
||
</h4>
|
||
<p id="section-3.3.3-1">When multiple workflows share a common agent pool, the pool <span class="bcp14">MUST</span>
|
||
implement bulkhead isolation:<a href="#section-3.3.3-1" class="pilcrow">¶</a></p>
|
||
<ol start="1" type="1" class="normal type-1" id="section-3.3.3-2">
|
||
<li id="section-3.3.3-2.1">
|
||
<p id="section-3.3.3-2.1.1"><strong>Connection limits</strong>: Each workflow <span class="bcp14">MUST</span> have a maximum number
|
||
of concurrent connections to the shared agent pool.<a href="#section-3.3.3-2.1.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-3.3.3-2.2">
|
||
<p id="section-3.3.3-2.2.1"><strong>Queue isolation</strong>: Each workflow's requests <span class="bcp14">MUST</span> be queued
|
||
independently, preventing one workflow's backlog from blocking
|
||
others.<a href="#section-3.3.3-2.2.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-3.3.3-2.3">
|
||
<p id="section-3.3.3-2.3.1"><strong>Resource quotas</strong>: Shared agent pools <span class="bcp14">SHOULD</span> enforce per-workflow
|
||
resource quotas (CPU, memory, request rate).<a href="#section-3.3.3-2.3.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
</ol>
|
||
</section>
|
||
</div>
|
||
</section>
|
||
</div>
|
||
<div id="cascade-detection">
|
||
<section id="section-3.4">
|
||
<h3 id="name-cascade-detection">
|
||
<a href="#section-3.4" class="section-number selfRef">3.4. </a><a href="#name-cascade-detection" class="section-name selfRef">Cascade Detection</a>
|
||
</h3>
|
||
<div id="detection-signals">
|
||
<section id="section-3.4.1">
|
||
<h4 id="name-detection-signals">
|
||
<a href="#section-3.4.1" class="section-number selfRef">3.4.1. </a><a href="#name-detection-signals" class="section-name selfRef">Detection Signals</a>
|
||
</h4>
|
||
<p id="section-3.4.1-1">Agents <span class="bcp14">MUST</span> monitor the following signals for cascade detection:<a href="#section-3.4.1-1" class="pilcrow">¶</a></p>
|
||
<span class="break"></span><dl class="dlParallel" id="section-3.4.1-2">
|
||
<dt id="section-3.4.1-2.1">Error Rate:</dt>
|
||
<dd style="margin-left: 1.5em" id="section-3.4.1-2.2">
|
||
<p id="section-3.4.1-2.2.1">The ratio of failed requests to total requests over a sliding
|
||
window. An error rate exceeding the circuit breaker threshold
|
||
indicates a potential cascade.<a href="#section-3.4.1-2.2.1" class="pilcrow">¶</a></p>
|
||
</dd>
|
||
<dd class="break"></dd>
|
||
<dt id="section-3.4.1-2.3">Latency Spike:</dt>
|
||
<dd style="margin-left: 1.5em" id="section-3.4.1-2.4">
|
||
<p id="section-3.4.1-2.4.1">A sudden increase in response latency (e.g., p99 latency exceeding
|
||
3x the baseline) indicates downstream congestion or failure.
|
||
Agents <span class="bcp14">SHOULD</span> track latency baselines using exponentially weighted
|
||
moving averages.<a href="#section-3.4.1-2.4.1" class="pilcrow">¶</a></p>
|
||
</dd>
|
||
<dd class="break"></dd>
|
||
<dt id="section-3.4.1-2.5">Resource Exhaustion:</dt>
|
||
<dd style="margin-left: 1.5em" id="section-3.4.1-2.6">
|
||
<p id="section-3.4.1-2.6.1">Thread pool saturation, connection pool exhaustion, or memory
|
||
pressure above configured thresholds indicates that a cascade is
|
||
consuming resources.<a href="#section-3.4.1-2.6.1" class="pilcrow">¶</a></p>
|
||
</dd>
|
||
<dd class="break"></dd>
|
||
</dl>
|
||
</section>
|
||
</div>
|
||
<div id="propagation-tracking-via-ect-dag-analysis">
|
||
<section id="section-3.4.2">
|
||
<h4 id="name-propagation-tracking-via-ec">
|
||
<a href="#section-3.4.2" class="section-number selfRef">3.4.2. </a><a href="#name-propagation-tracking-via-ec" class="section-name selfRef">Propagation Tracking via ECT DAG Analysis</a>
|
||
</h4>
|
||
<p id="section-3.4.2-1">Orchestrators <span class="bcp14">SHOULD</span> analyze the ECT DAG to detect cascading
|
||
patterns:<a href="#section-3.4.2-1" class="pilcrow">¶</a></p>
|
||
<ol start="1" type="1" class="normal type-1" id="section-3.4.2-2">
|
||
<li id="section-3.4.2-2.1">
|
||
<p id="section-3.4.2-2.1.1"><strong>Error clustering</strong>: Multiple <code>"circuit_breaker_open"</code> ECTs
|
||
referencing the same downstream agent within a short window
|
||
indicate a shared dependency failure.<a href="#section-3.4.2-2.1.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-3.4.2-2.2">
|
||
<p id="section-3.4.2-2.2.1"><strong>Depth-first propagation</strong>: Errors propagating along <code>par</code>
|
||
chains in the DAG indicate a synchronous cascade.<a href="#section-3.4.2-2.2.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-3.4.2-2.3">
|
||
<p id="section-3.4.2-2.3.1"><strong>Breadth-first propagation</strong>: Multiple sibling nodes in the
|
||
DAG failing concurrently indicate a shared infrastructure
|
||
failure.<a href="#section-3.4.2-2.3.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
</ol>
|
||
</section>
|
||
</div>
|
||
<div id="alert-format-and-escalation">
|
||
<section id="section-3.4.3">
|
||
<h4 id="name-alert-format-and-escalation">
|
||
<a href="#section-3.4.3" class="section-number selfRef">3.4.3. </a><a href="#name-alert-format-and-escalation" class="section-name selfRef">Alert Format and Escalation</a>
|
||
</h4>
|
||
<p id="section-3.4.3-1">When cascade detection identifies a propagating failure, the
|
||
detecting agent <span class="bcp14">MUST</span> emit a cascade alert ECT:<a href="#section-3.4.3-1" class="pilcrow">¶</a></p>
|
||
<span id="name-cascade-alert-ect"></span><div id="fig-cascade-alert">
|
||
<figure id="figure-6">
|
||
<div class="lang-json sourcecode" id="section-3.4.3-2.1">
|
||
<pre>
|
||
{
|
||
"exec_act": "cascade_detected",
|
||
"ext": {
|
||
"cascade.pattern": "depth_first",
|
||
"cascade.affected_agents": 4,
|
||
"cascade.root_cause_ect": "error-ect-uuid",
|
||
"cascade.blast_radius": [
|
||
"spiffe://example.com/agent/a",
|
||
"spiffe://example.com/agent/b",
|
||
"spiffe://example.com/agent/c"
|
||
]
|
||
}
|
||
}
|
||
</pre>
|
||
</div>
|
||
<figcaption><a href="#figure-6" class="selfRef">Figure 6</a>:
|
||
<a href="#name-cascade-alert-ect" class="selfRef">Cascade Alert ECT</a>
|
||
</figcaption></figure>
|
||
</div>
|
||
<p id="section-3.4.3-3">Cascade alerts with more than 3 affected agents <span class="bcp14">SHOULD</span> trigger
|
||
HITL escalation per <span>[<a href="#I-D.nennemann-agent-dag-hitl-safety" class="cite xref">I-D.nennemann-agent-dag-hitl-safety</a>]</span>.<a href="#section-3.4.3-3" class="pilcrow">¶</a></p>
|
||
</section>
|
||
</div>
|
||
</section>
|
||
</div>
|
||
</section>
|
||
</div>
|
||
<div id="real-time-rollback">
|
||
<section id="section-4">
|
||
<h2 id="name-real-time-rollback">
|
||
<a href="#section-4" class="section-number selfRef">4. </a><a href="#name-real-time-rollback" class="section-name selfRef">Real-Time Rollback</a>
|
||
</h2>
|
||
<div id="rollback-model">
|
||
<section id="section-4.1">
|
||
<h3 id="name-rollback-model">
|
||
<a href="#section-4.1" class="section-number selfRef">4.1. </a><a href="#name-rollback-model" class="section-name selfRef">Rollback Model</a>
|
||
</h3>
|
||
<p id="section-4.1-1">Rollback reverses the effects of agent actions by walking the ECT
|
||
DAG backwards from the point of failure to the nearest valid
|
||
recovery point.<a href="#section-4.1-1" class="pilcrow">¶</a></p>
|
||
<div id="walking-the-ect-dag-backwards">
|
||
<section id="section-4.1.1">
|
||
<h4 id="name-walking-the-ect-dag-backwar">
|
||
<a href="#section-4.1.1" class="section-number selfRef">4.1.1. </a><a href="#name-walking-the-ect-dag-backwar" class="section-name selfRef">Walking the ECT DAG Backwards</a>
|
||
</h4>
|
||
<p id="section-4.1.1-1">The rollback process follows <code>par</code> references in reverse:<a href="#section-4.1.1-1" class="pilcrow">¶</a></p>
|
||
<ol start="1" type="1" class="normal type-1" id="section-4.1.1-2">
|
||
<li id="section-4.1.1-2.1">
|
||
<p id="section-4.1.1-2.1.1">Identify the failing ECT node.<a href="#section-4.1.1-2.1.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-4.1.1-2.2">
|
||
<p id="section-4.1.1-2.2.1">Find the checkpoint ECT associated with the failing action
|
||
(referenced via <code>par</code>).<a href="#section-4.1.1-2.2.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-4.1.1-2.3">
|
||
<p id="section-4.1.1-2.3.1">Follow <code>par</code> references backwards to identify all downstream
|
||
actions that were caused by the checkpointed action.<a href="#section-4.1.1-2.3.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-4.1.1-2.4">
|
||
<p id="section-4.1.1-2.4.1">Issue rollback requests to each affected agent in reverse
|
||
topological order.<a href="#section-4.1.1-2.4.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
</ol>
|
||
<span id="name-rollback-order-via-dag-trav"></span><div id="fig-rollback-order">
|
||
<figure id="figure-7">
|
||
<div class="alignLeft art-text artwork" id="section-4.1.1-3.1">
|
||
<pre>
|
||
Checkpoint A ──► Action A1 ──► Checkpoint B ──► Action B1
|
||
│
|
||
└──► Action B2
|
||
|
||
Rollback order: B2, B1, B, A1, A (reverse topological)
|
||
</pre>
|
||
</div>
|
||
<figcaption><a href="#figure-7" class="selfRef">Figure 7</a>:
|
||
<a href="#name-rollback-order-via-dag-trav" class="selfRef">Rollback Order via DAG Traversal</a>
|
||
</figcaption></figure>
|
||
</div>
|
||
</section>
|
||
</div>
|
||
<div id="compensating-actions-vs-state-restoration">
|
||
<section id="section-4.1.2">
|
||
<h4 id="name-compensating-actions-vs-sta">
|
||
<a href="#section-4.1.2" class="section-number selfRef">4.1.2. </a><a href="#name-compensating-actions-vs-sta" class="section-name selfRef">Compensating Actions vs State Restoration</a>
|
||
</h4>
|
||
<p id="section-4.1.2-1">Rollback can be performed through two mechanisms:<a href="#section-4.1.2-1" class="pilcrow">¶</a></p>
|
||
<span class="break"></span><dl class="dlParallel" id="section-4.1.2-2">
|
||
<dt id="section-4.1.2-2.1">State Restoration:</dt>
|
||
<dd style="margin-left: 1.5em" id="section-4.1.2-2.2">
|
||
<p id="section-4.1.2-2.2.1">The agent restores its state from the checkpoint snapshot. This
|
||
is the preferred mechanism when the checkpoint contains a complete
|
||
state snapshot (verified via <code>out_hash</code>).<a href="#section-4.1.2-2.2.1" class="pilcrow">¶</a></p>
|
||
</dd>
|
||
<dd class="break"></dd>
|
||
<dt id="section-4.1.2-2.3">Compensating Action:</dt>
|
||
<dd style="margin-left: 1.5em" id="section-4.1.2-2.4">
|
||
<p id="section-4.1.2-2.4.1">When state restoration is not possible (e.g., the action involved
|
||
an external API call), the agent executes a compensating action
|
||
that semantically reverses the original action. Compensating
|
||
actions <span class="bcp14">MUST</span> be recorded as ECT nodes with <code>exec_act</code> value
|
||
<code>"compensate"</code>.<a href="#section-4.1.2-2.4.1" class="pilcrow">¶</a></p>
|
||
</dd>
|
||
<dd class="break"></dd>
|
||
</dl>
|
||
</section>
|
||
</div>
|
||
<div id="rollback-scope">
|
||
<section id="section-4.1.3">
|
||
<h4 id="name-rollback-scope">
|
||
<a href="#section-4.1.3" class="section-number selfRef">4.1.3. </a><a href="#name-rollback-scope" class="section-name selfRef">Rollback Scope</a>
|
||
</h4>
|
||
<p id="section-4.1.3-1">Rollback can be scoped to three levels:<a href="#section-4.1.3-1" class="pilcrow">¶</a></p>
|
||
<span class="break"></span><dl class="dlParallel" id="section-4.1.3-2">
|
||
<dt id="section-4.1.3-2.1">Single Agent:</dt>
|
||
<dd style="margin-left: 1.5em" id="section-4.1.3-2.2">
|
||
<p id="section-4.1.3-2.2.1">Only the specified agent's checkpoint is rolled back. No
|
||
downstream propagation occurs.<a href="#section-4.1.3-2.2.1" class="pilcrow">¶</a></p>
|
||
</dd>
|
||
<dd class="break"></dd>
|
||
<dt id="section-4.1.3-2.3">Sub-DAG:</dt>
|
||
<dd style="margin-left: 1.5em" id="section-4.1.3-2.4">
|
||
<p id="section-4.1.3-2.4.1">The checkpoint and all downstream checkpoints in the sub-DAG
|
||
are rolled back. This is the default when <code>cascade</code> is <code>true</code>.<a href="#section-4.1.3-2.4.1" class="pilcrow">¶</a></p>
|
||
</dd>
|
||
<dd class="break"></dd>
|
||
<dt id="section-4.1.3-2.5">Full Workflow:</dt>
|
||
<dd style="margin-left: 1.5em" id="section-4.1.3-2.6">
|
||
<p id="section-4.1.3-2.6.1">All checkpoints in the workflow are rolled back and the workflow
|
||
is terminated. This requires Rollback Coordinator authorization.<a href="#section-4.1.3-2.6.1" class="pilcrow">¶</a></p>
|
||
</dd>
|
||
<dd class="break"></dd>
|
||
</dl>
|
||
</section>
|
||
</div>
|
||
</section>
|
||
</div>
|
||
<div id="checkpoint-protocol">
|
||
<section id="section-4.2">
|
||
<h3 id="name-checkpoint-protocol">
|
||
<a href="#section-4.2" class="section-number selfRef">4.2. </a><a href="#name-checkpoint-protocol" class="section-name selfRef">Checkpoint Protocol</a>
|
||
</h3>
|
||
<div id="checkpoint-creation">
|
||
<section id="section-4.2.1">
|
||
<h4 id="name-checkpoint-creation">
|
||
<a href="#section-4.2.1" class="section-number selfRef">4.2.1. </a><a href="#name-checkpoint-creation" class="section-name selfRef">Checkpoint Creation</a>
|
||
</h4>
|
||
<p id="section-4.2.1-1">An agent <span class="bcp14">MUST</span> create a checkpoint ECT before any consequential
|
||
action. An action is consequential if it modifies external state
|
||
(network configuration, database records, API calls with side
|
||
effects).<a href="#section-4.2.1-1" class="pilcrow">¶</a></p>
|
||
<p id="section-4.2.1-2">A checkpoint is an ECT with:<a href="#section-4.2.1-2" class="pilcrow">¶</a></p>
|
||
<ul class="normal">
|
||
<li class="normal" id="section-4.2.1-3.1">
|
||
<p id="section-4.2.1-3.1.1"><code>exec_act</code>: <code>"checkpoint"</code><a href="#section-4.2.1-3.1.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li class="normal" id="section-4.2.1-3.2">
|
||
<p id="section-4.2.1-3.2.1"><code>par</code>: the ECT of the action being checkpointed<a href="#section-4.2.1-3.2.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li class="normal" id="section-4.2.1-3.3">
|
||
<p id="section-4.2.1-3.3.1"><code>out_hash</code>: SHA-256 hash of the agent's state snapshot<a href="#section-4.2.1-3.3.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
</ul>
|
||
<span id="name-checkpoint-ect"></span><div id="fig-checkpoint">
|
||
<figure id="figure-8">
|
||
<div class="lang-json sourcecode" id="section-4.2.1-4.1">
|
||
<pre>
|
||
{
|
||
"jti": "ckpt-uuid",
|
||
"exec_act": "checkpoint",
|
||
"par": ["action-ect-uuid"],
|
||
"out_hash": "sha256:...",
|
||
"ext": {
|
||
"cascade.reversible": true,
|
||
"cascade.rollback_uri":
|
||
"https://agent-b.example.com/.well-known/cascade/rollback",
|
||
"cascade.target": "router-07.example.com",
|
||
"cascade.description": "Update BGP peer configuration",
|
||
"cascade.ttl": 86400
|
||
}
|
||
}
|
||
</pre>
|
||
</div>
|
||
<figcaption><a href="#figure-8" class="selfRef">Figure 8</a>:
|
||
<a href="#name-checkpoint-ect" class="selfRef">Checkpoint ECT</a>
|
||
</figcaption></figure>
|
||
</div>
|
||
<p id="section-4.2.1-5">The <code>cascade.reversible</code> field <span class="bcp14">MUST</span> be present. If <code>false</code>, the
|
||
agent declares that this action cannot be automatically undone and
|
||
rollback requests <span class="bcp14">MUST</span> be escalated to a human operator via the
|
||
HITL mechanism <span>[<a href="#I-D.nennemann-agent-dag-hitl-safety" class="cite xref">I-D.nennemann-agent-dag-hitl-safety</a>]</span>.<a href="#section-4.2.1-5" class="pilcrow">¶</a></p>
|
||
</section>
|
||
</div>
|
||
<div id="checkpoint-storage-and-retrieval">
|
||
<section id="section-4.2.2">
|
||
<h4 id="name-checkpoint-storage-and-retr">
|
||
<a href="#section-4.2.2" class="section-number selfRef">4.2.2. </a><a href="#name-checkpoint-storage-and-retr" class="section-name selfRef">Checkpoint Storage and Retrieval</a>
|
||
</h4>
|
||
<p id="section-4.2.2-1">Checkpoint ECTs <span class="bcp14">MUST</span> be stored for at least the duration specified
|
||
by <code>cascade.ttl</code>. Agents <span class="bcp14">MUST</span> store checkpoints in durable storage
|
||
that survives agent restarts.<a href="#section-4.2.2-1" class="pilcrow">¶</a></p>
|
||
<p id="section-4.2.2-2">Agents <span class="bcp14">MUST</span> expose a checkpoint retrieval endpoint:<a href="#section-4.2.2-2" class="pilcrow">¶</a></p>
|
||
<div class="alignLeft art-text artwork" id="section-4.2.2-3">
|
||
<pre>
|
||
GET /.well-known/cascade/checkpoints/{jti} HTTP/1.1
|
||
</pre><a href="#section-4.2.2-3" class="pilcrow">¶</a>
|
||
</div>
|
||
<p id="section-4.2.2-4">The response <span class="bcp14">MUST</span> include the checkpoint ECT and its verification
|
||
status (whether <code>out_hash</code> matches the current stored state snapshot).<a href="#section-4.2.2-4" class="pilcrow">¶</a></p>
|
||
</section>
|
||
</div>
|
||
<div id="checkpoint-verification">
|
||
<section id="section-4.2.3">
|
||
<h4 id="name-checkpoint-verification">
|
||
<a href="#section-4.2.3" class="section-number selfRef">4.2.3. </a><a href="#name-checkpoint-verification" class="section-name selfRef">Checkpoint Verification</a>
|
||
</h4>
|
||
<p id="section-4.2.3-1">Before executing a rollback, the agent <span class="bcp14">MUST</span> verify the checkpoint
|
||
integrity:<a href="#section-4.2.3-1" class="pilcrow">¶</a></p>
|
||
<ol start="1" type="1" class="normal type-1" id="section-4.2.3-2">
|
||
<li id="section-4.2.3-2.1">
|
||
<p id="section-4.2.3-2.1.1">Retrieve the checkpoint ECT.<a href="#section-4.2.3-2.1.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-4.2.3-2.2">
|
||
<p id="section-4.2.3-2.2.1">Verify the ECT signature chain (L2/L3).<a href="#section-4.2.3-2.2.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-4.2.3-2.3">
|
||
<p id="section-4.2.3-2.3.1">Verify that the stored state snapshot matches <code>out_hash</code>.<a href="#section-4.2.3-2.3.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-4.2.3-2.4">
|
||
<p id="section-4.2.3-2.4.1">Verify that the checkpoint has not expired (<code>cascade.ttl</code>).<a href="#section-4.2.3-2.4.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
</ol>
|
||
<p id="section-4.2.3-3">If verification fails, the agent <span class="bcp14">MUST</span> reject the rollback request
|
||
and emit an error ECT.<a href="#section-4.2.3-3" class="pilcrow">¶</a></p>
|
||
</section>
|
||
</div>
|
||
</section>
|
||
</div>
|
||
<div id="distributed-rollback-coordination">
|
||
<section id="section-4.3">
|
||
<h3 id="name-distributed-rollback-coordi">
|
||
<a href="#section-4.3" class="section-number selfRef">4.3. </a><a href="#name-distributed-rollback-coordi" class="section-name selfRef">Distributed Rollback Coordination</a>
|
||
</h3>
|
||
<div id="rollback-coordinator-role">
|
||
<section id="section-4.3.1">
|
||
<h4 id="name-rollback-coordinator-role">
|
||
<a href="#section-4.3.1" class="section-number selfRef">4.3.1. </a><a href="#name-rollback-coordinator-role" class="section-name selfRef">Rollback Coordinator Role</a>
|
||
</h4>
|
||
<p id="section-4.3.1-1">For rollbacks spanning multiple agents (sub-DAG or full workflow
|
||
scope), a Rollback Coordinator <span class="bcp14">MUST</span> be designated. The coordinator
|
||
is typically the orchestrator or the agent that initiated the
|
||
workflow.<a href="#section-4.3.1-1" class="pilcrow">¶</a></p>
|
||
<p id="section-4.3.1-2">The coordinator is responsible for:<a href="#section-4.3.1-2" class="pilcrow">¶</a></p>
|
||
<ol start="1" type="1" class="normal type-1" id="section-4.3.1-3">
|
||
<li id="section-4.3.1-3.1">
|
||
<p id="section-4.3.1-3.1.1">Computing the blast radius by traversing the ECT DAG.<a href="#section-4.3.1-3.1.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-4.3.1-3.2">
|
||
<p id="section-4.3.1-3.2.1">Determining rollback order (reverse topological sort).<a href="#section-4.3.1-3.2.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-4.3.1-3.3">
|
||
<p id="section-4.3.1-3.3.1">Issuing rollback requests to each affected agent.<a href="#section-4.3.1-3.3.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-4.3.1-3.4">
|
||
<p id="section-4.3.1-3.4.1">Tracking rollback progress and handling failures.<a href="#section-4.3.1-3.4.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-4.3.1-3.5">
|
||
<p id="section-4.3.1-3.5.1">Emitting the final rollback completion ECT.<a href="#section-4.3.1-3.5.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
</ol>
|
||
</section>
|
||
</div>
|
||
<div id="two-phase-rollback-protocol">
|
||
<section id="section-4.3.2">
|
||
<h4 id="name-two-phase-rollback-protocol">
|
||
<a href="#section-4.3.2" class="section-number selfRef">4.3.2. </a><a href="#name-two-phase-rollback-protocol" class="section-name selfRef">Two-Phase Rollback Protocol</a>
|
||
</h4>
|
||
<p id="section-4.3.2-1">Distributed rollback follows a two-phase protocol:<a href="#section-4.3.2-1" class="pilcrow">¶</a></p>
|
||
<p id="section-4.3.2-2"><strong>Phase 1: Prepare</strong><a href="#section-4.3.2-2" class="pilcrow">¶</a></p>
|
||
<p id="section-4.3.2-3">The coordinator sends a prepare request to each affected agent:<a href="#section-4.3.2-3" class="pilcrow">¶</a></p>
|
||
<span id="name-rollback-prepare-request"></span><div id="fig-prepare">
|
||
<figure id="figure-9">
|
||
<div class="alignLeft art-text artwork" id="section-4.3.2-4.1">
|
||
<pre>
|
||
POST /.well-known/cascade/rollback/prepare HTTP/1.1
|
||
Content-Type: application/json
|
||
Execution-Context: <prepare-ect>
|
||
|
||
{
|
||
"rollback_id": "urn:uuid:...",
|
||
"checkpoint_id": "ckpt-uuid",
|
||
"scope": "sub_dag"
|
||
}
|
||
</pre>
|
||
</div>
|
||
<figcaption><a href="#figure-9" class="selfRef">Figure 9</a>:
|
||
<a href="#name-rollback-prepare-request" class="selfRef">Rollback Prepare Request</a>
|
||
</figcaption></figure>
|
||
</div>
|
||
<p id="section-4.3.2-5">Each agent <span class="bcp14">MUST</span> respond with either:<a href="#section-4.3.2-5" class="pilcrow">¶</a></p>
|
||
<ul class="normal">
|
||
<li class="normal" id="section-4.3.2-6.1">
|
||
<p id="section-4.3.2-6.1.1"><code>"prepared"</code>: The agent has verified its checkpoint and is ready
|
||
to roll back.<a href="#section-4.3.2-6.1.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li class="normal" id="section-4.3.2-6.2">
|
||
<p id="section-4.3.2-6.2.1"><code>"cannot_prepare"</code>: The agent cannot roll back (e.g., checkpoint
|
||
expired, irreversible action).<a href="#section-4.3.2-6.2.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
</ul>
|
||
<p id="section-4.3.2-7"><strong>Phase 2: Execute</strong><a href="#section-4.3.2-7" class="pilcrow">¶</a></p>
|
||
<p id="section-4.3.2-8">If all agents respond <code>"prepared"</code>, the coordinator sends execute
|
||
requests in reverse topological order:<a href="#section-4.3.2-8" class="pilcrow">¶</a></p>
|
||
<span id="name-rollback-execute-request"></span><div id="fig-execute">
|
||
<figure id="figure-10">
|
||
<div class="alignLeft art-text artwork" id="section-4.3.2-9.1">
|
||
<pre>
|
||
POST /.well-known/cascade/rollback HTTP/1.1
|
||
Content-Type: application/json
|
||
Execution-Context: <rollback-ect>
|
||
|
||
{
|
||
"rollback_id": "urn:uuid:...",
|
||
"checkpoint_id": "ckpt-uuid",
|
||
"phase": "execute"
|
||
}
|
||
</pre>
|
||
</div>
|
||
<figcaption><a href="#figure-10" class="selfRef">Figure 10</a>:
|
||
<a href="#name-rollback-execute-request" class="selfRef">Rollback Execute Request</a>
|
||
</figcaption></figure>
|
||
</div>
|
||
<p id="section-4.3.2-10">If any agent responds <code>"cannot_prepare"</code> in Phase 1, the
|
||
coordinator <span class="bcp14">MUST</span> either:<a href="#section-4.3.2-10" class="pilcrow">¶</a></p>
|
||
<ul class="normal">
|
||
<li class="normal" id="section-4.3.2-11.1">
|
||
<p id="section-4.3.2-11.1.1">Proceed with partial rollback (if the unprepared agent is not
|
||
on the critical path), or<a href="#section-4.3.2-11.1.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li class="normal" id="section-4.3.2-11.2">
|
||
<p id="section-4.3.2-11.2.1">Abort the rollback and escalate to HITL.<a href="#section-4.3.2-11.2.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
</ul>
|
||
</section>
|
||
</div>
|
||
<div id="partial-rollback-handling">
|
||
<section id="section-4.3.3">
|
||
<h4 id="name-partial-rollback-handling">
|
||
<a href="#section-4.3.3" class="section-number selfRef">4.3.3. </a><a href="#name-partial-rollback-handling" class="section-name selfRef">Partial Rollback Handling</a>
|
||
</h4>
|
||
<p id="section-4.3.3-1">When a distributed rollback cannot be completed fully, the
|
||
coordinator <span class="bcp14">MUST</span>:<a href="#section-4.3.3-1" class="pilcrow">¶</a></p>
|
||
<ol start="1" type="1" class="normal type-1" id="section-4.3.3-2">
|
||
<li id="section-4.3.3-2.1">
|
||
<p id="section-4.3.3-2.1.1">Roll back all agents that responded <code>"prepared"</code>.<a href="#section-4.3.3-2.1.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-4.3.3-2.2">
|
||
<p id="section-4.3.3-2.2.1">Record the partial rollback result in the ECT DAG.<a href="#section-4.3.3-2.2.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-4.3.3-2.3">
|
||
<p id="section-4.3.3-2.3.1">Emit an ECT with <code>exec_act</code> value <code>"rollback_complete"</code> and
|
||
<code>cascade.status</code> set to <code>"partial"</code>.<a href="#section-4.3.3-2.3.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-4.3.3-2.4">
|
||
<p id="section-4.3.3-2.4.1">Include the list of agents that could not be rolled back in
|
||
the <code>cascade.failed_agents</code> extension claim.<a href="#section-4.3.3-2.4.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
</ol>
|
||
</section>
|
||
</div>
|
||
<div id="conflict-resolution-during-concurrent-rollbacks">
|
||
<section id="section-4.3.4">
|
||
<h4 id="name-conflict-resolution-during-">
|
||
<a href="#section-4.3.4" class="section-number selfRef">4.3.4. </a><a href="#name-conflict-resolution-during-" class="section-name selfRef">Conflict Resolution During Concurrent Rollbacks</a>
|
||
</h4>
|
||
<p id="section-4.3.4-1">When multiple rollback requests target overlapping portions of the
|
||
ECT DAG:<a href="#section-4.3.4-1" class="pilcrow">¶</a></p>
|
||
<ol start="1" type="1" class="normal type-1" id="section-4.3.4-2">
|
||
<li id="section-4.3.4-2.1">
|
||
<p id="section-4.3.4-2.1.1">The rollback with the broader scope takes precedence (full
|
||
workflow > sub-DAG > single agent).<a href="#section-4.3.4-2.1.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-4.3.4-2.2">
|
||
<p id="section-4.3.4-2.2.1">If scopes are equal, the earlier rollback request (by timestamp)
|
||
takes precedence.<a href="#section-4.3.4-2.2.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-4.3.4-2.3">
|
||
<p id="section-4.3.4-2.3.1">The losing rollback request <span class="bcp14">MUST</span> be rejected with an error
|
||
indicating the conflicting rollback ID.<a href="#section-4.3.4-2.3.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
</ol>
|
||
<p id="section-4.3.4-3">Agents <span class="bcp14">MUST</span> implement idempotent rollback: receiving the same
|
||
<code>rollback_id</code> twice <span class="bcp14">MUST</span> return the same result without
|
||
re-executing the rollback.<a href="#section-4.3.4-3" class="pilcrow">¶</a></p>
|
||
</section>
|
||
</div>
|
||
</section>
|
||
</div>
|
||
<div id="rollback-evidence">
|
||
<section id="section-4.4">
|
||
<h3 id="name-rollback-evidence">
|
||
<a href="#section-4.4" class="section-number selfRef">4.4. </a><a href="#name-rollback-evidence" class="section-name selfRef">Rollback Evidence</a>
|
||
</h3>
|
||
<div id="ect-nodes-for-rollback-actions">
|
||
<section id="section-4.4.1">
|
||
<h4 id="name-ect-nodes-for-rollback-acti">
|
||
<a href="#section-4.4.1" class="section-number selfRef">4.4.1. </a><a href="#name-ect-nodes-for-rollback-acti" class="section-name selfRef">ECT Nodes for Rollback Actions</a>
|
||
</h4>
|
||
<p id="section-4.4.1-1">Each rollback action <span class="bcp14">MUST</span> produce ECT nodes for audit:<a href="#section-4.4.1-1" class="pilcrow">¶</a></p>
|
||
<span class="break"></span><dl class="dlParallel" id="section-4.4.1-2">
|
||
<dt id="section-4.4.1-2.1">Rollback Start:</dt>
|
||
<dd style="margin-left: 1.5em" id="section-4.4.1-2.2">
|
||
<p id="section-4.4.1-2.2.1"><code>exec_act</code>: <code>"rollback_start"</code>, <code>par</code> references the error ECT
|
||
that triggered the rollback.<a href="#section-4.4.1-2.2.1" class="pilcrow">¶</a></p>
|
||
</dd>
|
||
<dd class="break"></dd>
|
||
</dl>
|
||
<span id="name-rollback-start-ect"></span><div id="fig-rb-start">
|
||
<figure id="figure-11">
|
||
<div class="lang-json sourcecode" id="section-4.4.1-3.1">
|
||
<pre>
|
||
{
|
||
"jti": "rb-start-uuid",
|
||
"exec_act": "rollback_start",
|
||
"par": ["error-ect-uuid"],
|
||
"ext": {
|
||
"cascade.rollback_id": "urn:uuid:...",
|
||
"cascade.checkpoint_id": "ckpt-uuid",
|
||
"cascade.scope": "sub_dag",
|
||
"cascade.reason": "Upstream cascading failure"
|
||
}
|
||
}
|
||
</pre>
|
||
</div>
|
||
<figcaption><a href="#figure-11" class="selfRef">Figure 11</a>:
|
||
<a href="#name-rollback-start-ect" class="selfRef">Rollback Start ECT</a>
|
||
</figcaption></figure>
|
||
</div>
|
||
<span class="break"></span><dl class="dlParallel" id="section-4.4.1-4">
|
||
<dt id="section-4.4.1-4.1">Rollback Complete:</dt>
|
||
<dd style="margin-left: 1.5em" id="section-4.4.1-4.2">
|
||
<p id="section-4.4.1-4.2.1"><code>exec_act</code>: <code>"rollback_complete"</code>, <code>par</code> references the rollback
|
||
start ECT.<a href="#section-4.4.1-4.2.1" class="pilcrow">¶</a></p>
|
||
</dd>
|
||
<dd class="break"></dd>
|
||
</dl>
|
||
<span id="name-rollback-complete-ect"></span><div id="fig-rb-complete">
|
||
<figure id="figure-12">
|
||
<div class="lang-json sourcecode" id="section-4.4.1-5.1">
|
||
<pre>
|
||
{
|
||
"jti": "rb-complete-uuid",
|
||
"exec_act": "rollback_complete",
|
||
"par": ["rb-start-uuid"],
|
||
"out_hash": "sha256:...",
|
||
"ext": {
|
||
"cascade.rollback_id": "urn:uuid:...",
|
||
"cascade.status": "completed",
|
||
"cascade.state_hash_before": "sha256:...",
|
||
"cascade.state_hash_after": "sha256:...",
|
||
"cascade.cascaded": [
|
||
{
|
||
"agent": "spiffe://example.com/agent/monitor",
|
||
"status": "completed"
|
||
},
|
||
{
|
||
"agent": "spiffe://example.com/agent/classify",
|
||
"status": "escalated"
|
||
}
|
||
]
|
||
}
|
||
}
|
||
</pre>
|
||
</div>
|
||
<figcaption><a href="#figure-12" class="selfRef">Figure 12</a>:
|
||
<a href="#name-rollback-complete-ect" class="selfRef">Rollback Complete ECT</a>
|
||
</figcaption></figure>
|
||
</div>
|
||
</section>
|
||
</div>
|
||
<div id="rollback-audit-trail">
|
||
<section id="section-4.4.2">
|
||
<h4 id="name-rollback-audit-trail">
|
||
<a href="#section-4.4.2" class="section-number selfRef">4.4.2. </a><a href="#name-rollback-audit-trail" class="section-name selfRef">Rollback Audit Trail</a>
|
||
</h4>
|
||
<p id="section-4.4.2-1">The complete rollback audit trail is captured in the ECT DAG:<a href="#section-4.4.2-1" class="pilcrow">¶</a></p>
|
||
<span id="name-rollback-audit-trail-in-ect"></span><div id="fig-rb-audit">
|
||
<figure id="figure-13">
|
||
<div class="alignLeft art-text artwork" id="section-4.4.2-2.1">
|
||
<pre>
|
||
error ECT
|
||
│
|
||
▼
|
||
rollback_start ECT
|
||
│
|
||
├──► agent-A rollback_complete ECT
|
||
│
|
||
├──► agent-B rollback_complete ECT
|
||
│
|
||
└──► agent-C compensate ECT
|
||
</pre>
|
||
</div>
|
||
<figcaption><a href="#figure-13" class="selfRef">Figure 13</a>:
|
||
<a href="#name-rollback-audit-trail-in-ect" class="selfRef">Rollback Audit Trail in ECT DAG</a>
|
||
</figcaption></figure>
|
||
</div>
|
||
<p id="section-4.4.2-3">Status values for individual agent rollbacks: <code>completed</code>,
|
||
<code>partial</code>, <code>escalated</code>, <code>failed</code>.<a href="#section-4.4.2-3" class="pilcrow">¶</a></p>
|
||
</section>
|
||
</div>
|
||
</section>
|
||
</div>
|
||
</section>
|
||
</div>
|
||
<div id="ect-integration-1">
|
||
<section id="section-5">
|
||
<h2 id="name-ect-integration-2">
|
||
<a href="#section-5" class="section-number selfRef">5. </a><a href="#name-ect-integration-2" class="section-name selfRef">ECT Integration</a>
|
||
</h2>
|
||
<p id="section-5-1">This document defines the following new <code>exec_act</code> values for use
|
||
in ECT nodes <span>[<a href="#I-D.nennemann-wimse-ect" class="cite xref">I-D.nennemann-wimse-ect</a>]</span>:<a href="#section-5-1" class="pilcrow">¶</a></p>
|
||
<span id="name-new-exec_act-values"></span><div id="fig-exec-act-values">
|
||
<table class="center" id="table-1">
|
||
<caption>
|
||
<a href="#table-1" class="selfRef">Table 1</a>:
|
||
<a href="#name-new-exec_act-values" class="selfRef">New exec_act Values</a>
|
||
</caption>
|
||
<thead>
|
||
<tr>
|
||
<th class="text-left" rowspan="1" colspan="1">exec_act Value</th>
|
||
<th class="text-left" rowspan="1" colspan="1">Description</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>circuit_breaker_open</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">Circuit breaker transitioned to OPEN state</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>circuit_breaker_close</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">Circuit breaker transitioned to CLOSED state</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>checkpoint</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">State snapshot before consequential action</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>rollback_start</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">Rollback initiated for a checkpoint</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>rollback_complete</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">Rollback finished (with status)</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>compensate</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">Compensating action executed in lieu of state restoration</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>cascade_detected</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">Cascading failure pattern detected</td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
</div>
|
||
<p id="section-5-3">This document defines the following new <code>ext</code> claims for failure
|
||
context:<a href="#section-5-3" class="pilcrow">¶</a></p>
|
||
<span id="name-new-ext-claims-for-cascade-"></span><div id="fig-ext-claims">
|
||
<table class="center" id="table-2">
|
||
<caption>
|
||
<a href="#table-2" class="selfRef">Table 2</a>:
|
||
<a href="#name-new-ext-claims-for-cascade-" class="selfRef">New ext Claims for Cascade Prevention</a>
|
||
</caption>
|
||
<thead>
|
||
<tr>
|
||
<th class="text-left" rowspan="1" colspan="1">Claim</th>
|
||
<th class="text-left" rowspan="1" colspan="1">Type</th>
|
||
<th class="text-left" rowspan="1" colspan="1">Description</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>cascade.downstream_agent</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">string</td>
|
||
<td class="text-left" rowspan="1" colspan="1">SPIFFE ID of the downstream agent</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>cascade.error_rate</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">number</td>
|
||
<td class="text-left" rowspan="1" colspan="1">Error rate that triggered the circuit breaker</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>cascade.window_s</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">number</td>
|
||
<td class="text-left" rowspan="1" colspan="1">Sliding window duration in seconds</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>cascade.cooldown_s</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">number</td>
|
||
<td class="text-left" rowspan="1" colspan="1">Cooldown duration in seconds</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>cascade.reversible</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">boolean</td>
|
||
<td class="text-left" rowspan="1" colspan="1">Whether the checkpointed action can be undone</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>cascade.rollback_uri</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">string</td>
|
||
<td class="text-left" rowspan="1" colspan="1">URI for rollback requests</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>cascade.target</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">string</td>
|
||
<td class="text-left" rowspan="1" colspan="1">Target system of the checkpointed action</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>cascade.ttl</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">number</td>
|
||
<td class="text-left" rowspan="1" colspan="1">Checkpoint time-to-live in seconds</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>cascade.rollback_id</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">string</td>
|
||
<td class="text-left" rowspan="1" colspan="1">Unique identifier for a rollback operation</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>cascade.checkpoint_id</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">string</td>
|
||
<td class="text-left" rowspan="1" colspan="1">JTI of the checkpoint being rolled back</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>cascade.scope</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">string</td>
|
||
<td class="text-left" rowspan="1" colspan="1">Rollback scope: single, sub_dag, full_workflow</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>cascade.status</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">string</td>
|
||
<td class="text-left" rowspan="1" colspan="1">Rollback result status</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>cascade.reason</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">string</td>
|
||
<td class="text-left" rowspan="1" colspan="1">Human-readable reason for the action</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>cascade.pattern</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">string</td>
|
||
<td class="text-left" rowspan="1" colspan="1">Detected cascade pattern type</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>cascade.affected_agents</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">number</td>
|
||
<td class="text-left" rowspan="1" colspan="1">Count of agents affected by cascade</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>cascade.blast_radius</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">array</td>
|
||
<td class="text-left" rowspan="1" colspan="1">SPIFFE IDs of affected agents</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>cascade.cascaded</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">array</td>
|
||
<td class="text-left" rowspan="1" colspan="1">Per-agent rollback results</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>cascade.failed_agents</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">array</td>
|
||
<td class="text-left" rowspan="1" colspan="1">Agents that could not be rolled back</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>cascade.state_hash_before</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">string</td>
|
||
<td class="text-left" rowspan="1" colspan="1">State hash before rollback</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>cascade.state_hash_after</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">string</td>
|
||
<td class="text-left" rowspan="1" colspan="1">State hash after rollback</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>cascade.description</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">string</td>
|
||
<td class="text-left" rowspan="1" colspan="1">Human-readable description</td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
</div>
|
||
</section>
|
||
</div>
|
||
<div id="security-considerations">
|
||
<section id="section-6">
|
||
<h2 id="name-security-considerations">
|
||
<a href="#section-6" class="section-number selfRef">6. </a><a href="#name-security-considerations" class="section-name selfRef">Security Considerations</a>
|
||
</h2>
|
||
<div id="rollback-weaponization">
|
||
<section id="section-6.1">
|
||
<h3 id="name-rollback-weaponization">
|
||
<a href="#section-6.1" class="section-number selfRef">6.1. </a><a href="#name-rollback-weaponization" class="section-name selfRef">Rollback Weaponization</a>
|
||
</h3>
|
||
<p id="section-6.1-1">Malicious agents could attempt to force unnecessary rollbacks to
|
||
disrupt workflows. Mitigations:<a href="#section-6.1-1" class="pilcrow">¶</a></p>
|
||
<ol start="1" type="1" class="normal type-1" id="section-6.1-2">
|
||
<li id="section-6.1-2.1">
|
||
<p id="section-6.1-2.1.1">Rollback requests <span class="bcp14">MUST</span> be authenticated via the ECT signature
|
||
chain. Only agents whose ECTs appear in the same workflow DAG
|
||
(identified by <code>wid</code>) are authorized to request rollback.<a href="#section-6.1-2.1.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-6.1-2.2">
|
||
<p id="section-6.1-2.2.1">Rollback requests from outside the originating workflow <span class="bcp14">MUST</span> be
|
||
rejected with HTTP 403.<a href="#section-6.1-2.2.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-6.1-2.3">
|
||
<p id="section-6.1-2.3.1">Agents <span class="bcp14">SHOULD</span> implement rate limiting on rollback requests to
|
||
prevent denial-of-service through rollback flooding.<a href="#section-6.1-2.3.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-6.1-2.4">
|
||
<p id="section-6.1-2.4.1">The two-phase rollback protocol provides a prepare phase where
|
||
agents can validate the rollback request before committing.<a href="#section-6.1-2.4.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
</ol>
|
||
</section>
|
||
</div>
|
||
<div id="circuit-breaker-manipulation">
|
||
<section id="section-6.2">
|
||
<h3 id="name-circuit-breaker-manipulatio">
|
||
<a href="#section-6.2" class="section-number selfRef">6.2. </a><a href="#name-circuit-breaker-manipulatio" class="section-name selfRef">Circuit Breaker Manipulation</a>
|
||
</h3>
|
||
<p id="section-6.2-1">An adversary could attempt to manipulate circuit breaker state to
|
||
either prevent legitimate circuit breaking or force unnecessary
|
||
circuit breaks:<a href="#section-6.2-1" class="pilcrow">¶</a></p>
|
||
<ol start="1" type="1" class="normal type-1" id="section-6.2-2">
|
||
<li id="section-6.2-2.1">
|
||
<p id="section-6.2-2.1.1"><strong>False error injection</strong>: A malicious agent could emit false
|
||
error ECTs to trigger circuit breakers. At L2/L3
|
||
<span>[<a href="#I-D.nennemann-wimse-ect" class="cite xref">I-D.nennemann-wimse-ect</a>]</span>, ECT signatures prevent forgery.
|
||
Agents <span class="bcp14">SHOULD</span> verify that error ECTs reference valid <code>par</code>
|
||
values within their own workflow DAG.<a href="#section-6.2-2.1.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-6.2-2.2">
|
||
<p id="section-6.2-2.2.1"><strong>Circuit breaker suppression</strong>: An adversary could attempt to
|
||
reset circuit breakers by sending successful probe responses.
|
||
Agents <span class="bcp14">MUST</span> only accept probe responses from the actual
|
||
downstream agent (verified via ECT identity binding).<a href="#section-6.2-2.2.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-6.2-2.3">
|
||
<p id="section-6.2-2.3.1"><strong>Status endpoint abuse</strong>: The <code>/.well-known/cascade/circuits</code>
|
||
endpoint reveals system health topology. This endpoint <span class="bcp14">MUST</span>
|
||
require authentication and <span class="bcp14">SHOULD</span> be restricted to agents within
|
||
the same administrative domain.<a href="#section-6.2-2.3.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
</ol>
|
||
</section>
|
||
</div>
|
||
<div id="checkpoint-integrity">
|
||
<section id="section-6.3">
|
||
<h3 id="name-checkpoint-integrity">
|
||
<a href="#section-6.3" class="section-number selfRef">6.3. </a><a href="#name-checkpoint-integrity" class="section-name selfRef">Checkpoint Integrity</a>
|
||
</h3>
|
||
<p id="section-6.3-1">Checkpoint state snapshots contain sensitive system state. Agents
|
||
<span class="bcp14">MUST</span>:<a href="#section-6.3-1" class="pilcrow">¶</a></p>
|
||
<ol start="1" type="1" class="normal type-1" id="section-6.3-2">
|
||
<li id="section-6.3-2.1">
|
||
<p id="section-6.3-2.1.1">Encrypt stored checkpoint state at rest.<a href="#section-6.3-2.1.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-6.3-2.2">
|
||
<p id="section-6.3-2.2.1">Reference checkpoint state via <code>out_hash</code> only in ECTs; <span class="bcp14">MUST NOT</span>
|
||
include checkpoint contents in ECT claims.<a href="#section-6.3-2.2.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-6.3-2.3">
|
||
<p id="section-6.3-2.3.1">Verify <code>out_hash</code> integrity before executing rollback to prevent
|
||
rollback to a tampered state.<a href="#section-6.3-2.3.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-6.3-2.4">
|
||
<p id="section-6.3-2.4.1">Enforce checkpoint storage quotas to prevent checkpoint flooding
|
||
attacks.<a href="#section-6.3-2.4.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
<li id="section-6.3-2.5">
|
||
<p id="section-6.3-2.5.1">Purge expired checkpoints (past <code>cascade.ttl</code>).<a href="#section-6.3-2.5.1" class="pilcrow">¶</a></p>
|
||
</li>
|
||
</ol>
|
||
</section>
|
||
</div>
|
||
</section>
|
||
</div>
|
||
<div id="iana-considerations">
|
||
<section id="section-7">
|
||
<h2 id="name-iana-considerations">
|
||
<a href="#section-7" class="section-number selfRef">7. </a><a href="#name-iana-considerations" class="section-name selfRef">IANA Considerations</a>
|
||
</h2>
|
||
<div id="registration-of-execact-values">
|
||
<section id="section-7.1">
|
||
<h3 id="name-registration-of-exec_act-va">
|
||
<a href="#section-7.1" class="section-number selfRef">7.1. </a><a href="#name-registration-of-exec_act-va" class="section-name selfRef">Registration of exec_act Values</a>
|
||
</h3>
|
||
<p id="section-7.1-1">This document requests registration of the following <code>exec_act</code>
|
||
values in the ECT exec_act registry:<a href="#section-7.1-1" class="pilcrow">¶</a></p>
|
||
<span id="name-exec_act-value-registration"></span><div id="fig-iana-exec-act">
|
||
<table class="center" id="table-3">
|
||
<caption>
|
||
<a href="#table-3" class="selfRef">Table 3</a>:
|
||
<a href="#name-exec_act-value-registration" class="selfRef">exec_act Value Registrations</a>
|
||
</caption>
|
||
<thead>
|
||
<tr>
|
||
<th class="text-left" rowspan="1" colspan="1">Value</th>
|
||
<th class="text-left" rowspan="1" colspan="1">Description</th>
|
||
<th class="text-left" rowspan="1" colspan="1">Reference</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>circuit_breaker_open</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">Circuit breaker transitioned to OPEN</td>
|
||
<td class="text-left" rowspan="1" colspan="1">This document</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>circuit_breaker_close</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">Circuit breaker transitioned to CLOSED</td>
|
||
<td class="text-left" rowspan="1" colspan="1">This document</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>checkpoint</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">State snapshot before consequential action</td>
|
||
<td class="text-left" rowspan="1" colspan="1">This document</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>rollback_start</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">Rollback operation initiated</td>
|
||
<td class="text-left" rowspan="1" colspan="1">This document</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>rollback_complete</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">Rollback operation finished</td>
|
||
<td class="text-left" rowspan="1" colspan="1">This document</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>compensate</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">Compensating action executed</td>
|
||
<td class="text-left" rowspan="1" colspan="1">This document</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>cascade_detected</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">Cascading failure pattern detected</td>
|
||
<td class="text-left" rowspan="1" colspan="1">This document</td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
</div>
|
||
</section>
|
||
</div>
|
||
<div id="registration-of-ext-claims">
|
||
<section id="section-7.2">
|
||
<h3 id="name-registration-of-ext-claims">
|
||
<a href="#section-7.2" class="section-number selfRef">7.2. </a><a href="#name-registration-of-ext-claims" class="section-name selfRef">Registration of ext Claims</a>
|
||
</h3>
|
||
<p id="section-7.2-1">This document requests registration of the <code>ext</code> claims listed in
|
||
<a href="#fig-ext-claims" class="auto internal xref">Table 2</a> in the ECT extension claims registry. All claims
|
||
use the <code>cascade.</code> namespace prefix.<a href="#section-7.2-1" class="pilcrow">¶</a></p>
|
||
</section>
|
||
</div>
|
||
<div id="well-known-uri-registration">
|
||
<section id="section-7.3">
|
||
<h3 id="name-well-known-uri-registration">
|
||
<a href="#section-7.3" class="section-number selfRef">7.3. </a><a href="#name-well-known-uri-registration" class="section-name selfRef">Well-Known URI Registration</a>
|
||
</h3>
|
||
<p id="section-7.3-1">This document requests registration of the following well-known URI
|
||
suffixes per <span>[<a href="#RFC9110" class="cite xref">RFC9110</a>]</span>:<a href="#section-7.3-1" class="pilcrow">¶</a></p>
|
||
<span id="name-well-known-uri-registrations"></span><div id="fig-iana-uris">
|
||
<table class="center" id="table-4">
|
||
<caption>
|
||
<a href="#table-4" class="selfRef">Table 4</a>:
|
||
<a href="#name-well-known-uri-registrations" class="selfRef">Well-Known URI Registrations</a>
|
||
</caption>
|
||
<thead>
|
||
<tr>
|
||
<th class="text-left" rowspan="1" colspan="1">URI Suffix</th>
|
||
<th class="text-left" rowspan="1" colspan="1">Description</th>
|
||
<th class="text-left" rowspan="1" colspan="1">Reference</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>cascade/circuits</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">Circuit breaker status</td>
|
||
<td class="text-left" rowspan="1" colspan="1">This document</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>cascade/rollback</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">Rollback request endpoint</td>
|
||
<td class="text-left" rowspan="1" colspan="1">This document</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>cascade/rollback/prepare</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">Rollback prepare endpoint</td>
|
||
<td class="text-left" rowspan="1" colspan="1">This document</td>
|
||
</tr>
|
||
<tr>
|
||
<td class="text-left" rowspan="1" colspan="1">
|
||
<code>cascade/checkpoints</code>
|
||
</td>
|
||
<td class="text-left" rowspan="1" colspan="1">Checkpoint retrieval</td>
|
||
<td class="text-left" rowspan="1" colspan="1">This document</td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
</div>
|
||
</section>
|
||
</div>
|
||
</section>
|
||
</div>
|
||
<div id="sec-combined-references">
|
||
<section id="section-8">
|
||
<h2 id="name-references">
|
||
<a href="#section-8" class="section-number selfRef">8. </a><a href="#name-references" class="section-name selfRef">References</a>
|
||
</h2>
|
||
<div id="sec-normative-references">
|
||
<section id="section-8.1">
|
||
<h3 id="name-normative-references">
|
||
<a href="#section-8.1" class="section-number selfRef">8.1. </a><a href="#name-normative-references" class="section-name selfRef">Normative References</a>
|
||
</h3>
|
||
<dl class="references">
|
||
<dt id="I-D.nennemann-agent-dag-hitl-safety">[I-D.nennemann-agent-dag-hitl-safety]</dt>
|
||
<dd>
|
||
<span class="refTitle">"Agent Context Policy Token: DAG Delegation with Human Override"</span>, <span>n.d.</span>, <span><<a href="https://datatracker.ietf.org/doc/draft-nennemann-agent-dag-hitl-safety/">https://datatracker.ietf.org/doc/draft-nennemann-agent-dag-hitl-safety/</a>></span>. </dd>
|
||
<dd class="break"></dd>
|
||
<dt id="I-D.nennemann-wimse-ect">[I-D.nennemann-wimse-ect]</dt>
|
||
<dd>
|
||
<span class="refTitle">"Execution Context Tokens for Distributed Agentic Workflows"</span>, <span>n.d.</span>, <span><<a href="https://datatracker.ietf.org/doc/draft-nennemann-wimse-ect/">https://datatracker.ietf.org/doc/draft-nennemann-wimse-ect/</a>></span>. </dd>
|
||
<dd class="break"></dd>
|
||
<dt id="RFC2119">[RFC2119]</dt>
|
||
<dd>
|
||
<span class="refAuthor">Bradner, S.</span>, <span class="refTitle">"Key words for use in RFCs to Indicate Requirement Levels"</span>, <span class="seriesInfo">BCP 14</span>, <span class="seriesInfo">RFC 2119</span>, <span class="seriesInfo">DOI 10.17487/RFC2119</span>, <time datetime="1997-03" class="refDate">March 1997</time>, <span><<a href="https://www.rfc-editor.org/rfc/rfc2119">https://www.rfc-editor.org/rfc/rfc2119</a>></span>. </dd>
|
||
<dd class="break"></dd>
|
||
<dt id="RFC7515">[RFC7515]</dt>
|
||
<dd>
|
||
<span class="refAuthor">Jones, M.</span>, <span class="refAuthor">Bradley, J.</span>, and <span class="refAuthor">N. Sakimura</span>, <span class="refTitle">"JSON Web Signature (JWS)"</span>, <span class="seriesInfo">RFC 7515</span>, <span class="seriesInfo">DOI 10.17487/RFC7515</span>, <time datetime="2015-05" class="refDate">May 2015</time>, <span><<a href="https://www.rfc-editor.org/rfc/rfc7515">https://www.rfc-editor.org/rfc/rfc7515</a>></span>. </dd>
|
||
<dd class="break"></dd>
|
||
<dt id="RFC7519">[RFC7519]</dt>
|
||
<dd>
|
||
<span class="refAuthor">Jones, M.</span>, <span class="refAuthor">Bradley, J.</span>, and <span class="refAuthor">N. Sakimura</span>, <span class="refTitle">"JSON Web Token (JWT)"</span>, <span class="seriesInfo">RFC 7519</span>, <span class="seriesInfo">DOI 10.17487/RFC7519</span>, <time datetime="2015-05" class="refDate">May 2015</time>, <span><<a href="https://www.rfc-editor.org/rfc/rfc7519">https://www.rfc-editor.org/rfc/rfc7519</a>></span>. </dd>
|
||
<dd class="break"></dd>
|
||
<dt id="RFC8174">[RFC8174]</dt>
|
||
<dd>
|
||
<span class="refAuthor">Leiba, B.</span>, <span class="refTitle">"Ambiguity of Uppercase vs Lowercase in RFC 2119 Key Words"</span>, <span class="seriesInfo">BCP 14</span>, <span class="seriesInfo">RFC 8174</span>, <span class="seriesInfo">DOI 10.17487/RFC8174</span>, <time datetime="2017-05" class="refDate">May 2017</time>, <span><<a href="https://www.rfc-editor.org/rfc/rfc8174">https://www.rfc-editor.org/rfc/rfc8174</a>></span>. </dd>
|
||
<dd class="break"></dd>
|
||
<dt id="RFC9110">[RFC9110]</dt>
|
||
<dd>
|
||
<span class="refAuthor">Fielding, R., Ed.</span>, <span class="refAuthor">Nottingham, M., Ed.</span>, and <span class="refAuthor">J. Reschke, Ed.</span>, <span class="refTitle">"HTTP Semantics"</span>, <span class="seriesInfo">STD 97</span>, <span class="seriesInfo">RFC 9110</span>, <span class="seriesInfo">DOI 10.17487/RFC9110</span>, <time datetime="2022-06" class="refDate">June 2022</time>, <span><<a href="https://www.rfc-editor.org/rfc/rfc9110">https://www.rfc-editor.org/rfc/rfc9110</a>></span>. </dd>
|
||
<dd class="break"></dd>
|
||
</dl>
|
||
</section>
|
||
</div>
|
||
<div id="sec-informative-references">
|
||
<section id="section-8.2">
|
||
<h3 id="name-informative-references">
|
||
<a href="#section-8.2" class="section-number selfRef">8.2. </a><a href="#name-informative-references" class="section-name selfRef">Informative References</a>
|
||
</h3>
|
||
<dl class="references">
|
||
<dt id="I-D.nennemann-agent-gap-analysis">[I-D.nennemann-agent-gap-analysis]</dt>
|
||
<dd>
|
||
<span class="refTitle">"Gap Analysis of IETF Standards for Autonomous AI Agent Networking"</span>, <span>n.d.</span>, <span><<a href="https://datatracker.ietf.org/doc/draft-nennemann-agent-gap-analysis/">https://datatracker.ietf.org/doc/draft-nennemann-agent-gap-analysis/</a>></span>. </dd>
|
||
<dd class="break"></dd>
|
||
</dl>
|
||
</section>
|
||
</div>
|
||
</section>
|
||
</div>
|
||
<div id="acknowledgments">
|
||
<section id="appendix-A">
|
||
<h2 id="name-acknowledgments">
|
||
<a href="#name-acknowledgments" class="section-name selfRef">Acknowledgments</a>
|
||
</h2>
|
||
<p id="appendix-A-1">This document absorbs and supersedes concepts from the earlier Agent
|
||
Error Recovery and Rollback (AERR) and Agent Task DAG (ATD) proposals.
|
||
It builds on the Execution Context Token specification
|
||
<span>[<a href="#I-D.nennemann-wimse-ect" class="cite xref">I-D.nennemann-wimse-ect</a>]</span> for DAG-based audit trails and the Agent
|
||
Context Policy Token <span>[<a href="#I-D.nennemann-agent-dag-hitl-safety" class="cite xref">I-D.nennemann-agent-dag-hitl-safety</a>]</span> for HITL
|
||
escalation of irreversible actions. The circuit breaker pattern is
|
||
adapted from microservice architecture best practices.<a href="#appendix-A-1" class="pilcrow">¶</a></p>
|
||
</section>
|
||
</div>
|
||
<div id="authors-addresses">
|
||
<section id="appendix-B">
|
||
<h2 id="name-authors-address">
|
||
<a href="#name-authors-address" class="section-name selfRef">Author's Address</a>
|
||
</h2>
|
||
<address class="vcard">
|
||
<div dir="auto" class="left"><span class="fn nameRole">Christian Nennemann</span></div>
|
||
<div dir="auto" class="left"><span class="org">Independent Researcher</span></div>
|
||
<div class="email">
|
||
<span>Email:</span>
|
||
<a href="mailto:ietf@nennemann.de" class="email">ietf@nennemann.de</a>
|
||
</div>
|
||
</address>
|
||
</section>
|
||
</div>
|
||
<script>const toc = document.getElementById("toc");
|
||
toc.querySelector("h2").addEventListener("click", e => {
|
||
toc.classList.toggle("active");
|
||
});
|
||
toc.querySelector("nav").addEventListener("click", e => {
|
||
toc.classList.remove("active");
|
||
});
|
||
</script>
|
||
</body>
|
||
</html>
|