Cooperative IRL (CIRL)
cirlapproachPath: /knowledge-base/responses/cirl/
E586Entity ID (EID)
Page Recorddatabase.json — merged from MDX frontmatter + Entity YAML + computed metrics at build time
{
"id": "cirl",
"wikiId": "E586",
"path": "/knowledge-base/responses/cirl/",
"filePath": "knowledge-base/responses/cirl.mdx",
"title": "Cooperative IRL (CIRL)",
"quality": 65,
"readerImportance": 25,
"researchImportance": 8,
"tacticalValue": null,
"contentFormat": "article",
"causalLevel": null,
"lastUpdated": "2026-01-28",
"dateCreated": "2026-02-15",
"summary": "CIRL is a theoretical framework where AI systems maintain uncertainty about human preferences, which naturally incentivizes corrigibility and deference. Despite elegant theory with formal proofs, the approach faces a substantial theory-practice gap with no production deployments and only \\$1-5M/year in academic investment, making it more influential for conceptual foundations than immediate intervention design.",
"description": "Cooperative Inverse Reinforcement Learning (CIRL) is a theoretical framework where AI systems maintain uncertainty about human preferences and cooperatively learn them through interaction.",
"ratings": {
"novelty": 3.5,
"rigor": 5,
"completeness": 6,
"actionability": 3
},
"category": "responses",
"subcategory": "alignment-theoretical",
"clusters": [
"ai-safety"
],
"metrics": {
"wordCount": 1944,
"tableCount": 21,
"diagramCount": 1,
"internalLinks": 14,
"externalLinks": 11,
"footnoteCount": 0,
"bulletRatio": 0.05,
"sectionCount": 32,
"hasOverview": true,
"structuralScore": 15
},
"suggestedQuality": 100,
"updateFrequency": 90,
"evergreen": true,
"wordCount": 1944,
"unconvertedLinks": [
{
"text": "Hadfield-Menell et al., 2017",
"url": "https://arxiv.org/abs/1611.08219",
"resourceId": "026569778403629b",
"resourceTitle": "Hadfield-Menell et al. (2017)"
},
{
"text": "Cooperative Inverse Reinforcement Learning",
"url": "https://arxiv.org/abs/1606.03137",
"resourceId": "821f65afa4c681ca",
"resourceTitle": "Hadfield-Menell et al. (2016)"
},
{
"text": "The Off-Switch Game",
"url": "https://arxiv.org/abs/1611.08219",
"resourceId": "026569778403629b",
"resourceTitle": "Hadfield-Menell et al. (2017)"
},
{
"text": "Incorrigibility in the CIRL Framework",
"url": "https://intelligence.org/2017/08/31/incorrigibility-in-cirl/",
"resourceId": "3e250a28699df556",
"resourceTitle": "CIRL corrigibility proved fragile"
}
],
"unconvertedLinkCount": 4,
"convertedLinkCount": 0,
"backlinkCount": 3,
"hallucinationRisk": {
"level": "medium",
"score": 45,
"factors": [
"no-citations",
"conceptual-content"
]
},
"entityType": "approach",
"redundancy": {
"maxSimilarity": 15,
"similarPages": [
{
"id": "chai",
"title": "Center for Human-Compatible AI (CHAI)",
"path": "/knowledge-base/organizations/chai/",
"similarity": 15
},
{
"id": "cooperative-ai",
"title": "Cooperative AI",
"path": "/knowledge-base/responses/cooperative-ai/",
"similarity": 14
},
{
"id": "debate",
"title": "AI Safety via Debate",
"path": "/knowledge-base/responses/debate/",
"similarity": 14
},
{
"id": "instrumental-convergence-framework",
"title": "Instrumental Convergence Framework",
"path": "/knowledge-base/models/instrumental-convergence-framework/",
"similarity": 13
},
{
"id": "deceptive-alignment-decomposition",
"title": "Deceptive Alignment Decomposition Model",
"path": "/knowledge-base/models/deceptive-alignment-decomposition/",
"similarity": 12
}
]
},
"coverage": {
"passing": 7,
"total": 13,
"targets": {
"tables": 8,
"diagrams": 1,
"internalLinks": 16,
"externalLinks": 10,
"footnotes": 6,
"references": 6
},
"actuals": {
"tables": 21,
"diagrams": 1,
"internalLinks": 14,
"externalLinks": 11,
"footnotes": 0,
"references": 3,
"quotesWithQuotes": 0,
"quotesTotal": 0,
"accuracyChecked": 0,
"accuracyTotal": 0
},
"items": {
"summary": "green",
"schedule": "green",
"entity": "green",
"editHistory": "red",
"overview": "green",
"tables": "green",
"diagrams": "green",
"internalLinks": "amber",
"externalLinks": "green",
"footnotes": "red",
"references": "amber",
"quotes": "red",
"accuracy": "red"
},
"ratingsString": "N:3.5 R:5 A:3 C:6"
},
"readerRank": 488,
"researchRank": 563,
"recommendedScore": 155.71
}External Links
No external links
Backlinks (3)
| id | title | type | relationship |
|---|---|---|---|
| autonomous-cooperative-agents | Autonomous Cooperative Agents | concept | — |
| alignment-theoretical-overview | Theoretical Foundations (Overview) | concept | — |
| cooperative-ai | Cooperative AI | approach | — |