AI Accident Risk Cruxes
accident-riskscruxPath: /knowledge-base/cruxes/accident-risks/
E394Entity ID (EID)
Page Recorddatabase.json — merged from MDX frontmatter + Entity YAML + computed metrics at build time
{
"id": "accident-risks",
"wikiId": "E394",
"path": "/knowledge-base/cruxes/accident-risks/",
"filePath": "knowledge-base/cruxes/accident-risks.mdx",
"title": "AI Accident Risk Cruxes",
"quality": 67,
"readerImportance": 93.5,
"researchImportance": 95,
"tacticalValue": 55,
"contentFormat": "article",
"causalLevel": null,
"lastUpdated": "2026-03-15",
"dateCreated": "2026-02-15",
"summary": "Comprehensive survey of AI safety researcher disagreements on accident risks, quantifying probability ranges for mesa-optimization (15-55%), deceptive alignment (15-50%), and P(doom) (5-35% median across populations). Integrates 2024-2025 empirical breakthroughs including Anthropic's Sleeper Agents study (backdoors persist through safety training, >99% AUROC detection) and SAD benchmark showing rapid situational awareness advances (Claude Sonnet 4.5: 58% evaluation detection vs 22% for Opus 4.1). Expanded to cover near-term concrete accident risk vectors: adversarial robustness failures, safe exploration failures in RL, prompt injection and agentic security vulnerabilities, instruction hierarchy violations, and human-facing safety issues in clinical and consumer AI contexts.",
"description": "Key uncertainties that determine views on AI accident risks and alignment difficulty, including questions about mesa-optimization, deceptive alignment, adversarial robustness, safe exploration, prompt injection in agentic systems, and near-term concrete safety failures.",
"ratings": {
"novelty": 5.2,
"rigor": 6.8,
"completeness": 7.5,
"actionability": 7.3
},
"category": "cruxes",
"subcategory": null,
"clusters": [
"ai-safety",
"governance"
],
"metrics": {
"wordCount": 6377,
"tableCount": 26,
"diagramCount": 1,
"internalLinks": 115,
"externalLinks": 74,
"footnoteCount": 0,
"bulletRatio": 0.09,
"sectionCount": 43,
"hasOverview": true,
"structuralScore": 15
},
"suggestedQuality": 100,
"updateFrequency": 45,
"evergreen": true,
"wordCount": 6377,
"unconvertedLinks": [
{
"text": "2025 Expert Survey",
"url": "https://arxiv.org/html/2502.14870v1",
"resourceId": "4e7f0e37bace9678",
"resourceTitle": "Roman Yampolskiy"
},
{
"text": "AI Impacts 2023 survey",
"url": "https://wiki.aiimpacts.org/ai_timelines/predictions_of_human-level_ai_timelines/ai_timeline_surveys/2023_expert_survey_on_progress_in_ai",
"resourceId": "b4342da2ca0d2721",
"resourceTitle": "AI Impacts 2023 survey"
},
{
"text": "MIRI research",
"url": "https://intelligence.org/learned-optimization/",
"resourceId": "e573623625e9d5d2",
"resourceTitle": "Learned Optimization - Machine Intelligence Research Institute"
},
{
"text": "Anthropic Sleeper Agents (2024)",
"url": "https://arxiv.org/abs/2401.05566",
"resourceId": "e5c0904211c7d0cc",
"resourceTitle": "Sleeper Agents: Training Deceptive LLMs that Persist Through Safety Training"
},
{
"text": "OpenAI Superalignment",
"url": "https://openai.com/index/superalignment-fast-grants/",
"resourceId": "82eb0a4b47c95d2a",
"resourceTitle": "OpenAI Superalignment Fast Grants"
},
{
"text": "2025 AI Safety Index",
"url": "https://futureoflife.org/ai-safety-index-summer-2025/",
"resourceId": "df46edd6fa2078d1",
"resourceTitle": "FLI AI Safety Index Summer 2025"
},
{
"text": "2023 AI Impacts survey",
"url": "https://wiki.aiimpacts.org/ai_timelines/predictions_of_human-level_ai_timelines/ai_timeline_surveys/2023_expert_survey_on_progress_in_ai",
"resourceId": "b4342da2ca0d2721",
"resourceTitle": "AI Impacts 2023 survey"
},
{
"text": "AI Impacts Survey",
"url": "https://wiki.aiimpacts.org/ai_timelines/predictions_of_human-level_ai_timelines/ai_timeline_surveys/2023_expert_survey_on_progress_in_ai",
"resourceId": "b4342da2ca0d2721",
"resourceTitle": "AI Impacts 2023 survey"
},
{
"text": "EA Forum Survey",
"url": "https://forum.effectivealtruism.org/posts/8CM9vZ2nnQsWJNsHx/existential-risk-from-ai-survey-results",
"resourceId": "0dee84dcc4f4076f",
"resourceTitle": "\"Existential risk from AI\" survey results"
},
{
"text": "EA Forum Survey",
"url": "https://forum.effectivealtruism.org/posts/8CM9vZ2nnQsWJNsHx/existential-risk-from-ai-survey-results",
"resourceId": "0dee84dcc4f4076f",
"resourceTitle": "\"Existential risk from AI\" survey results"
},
{
"text": "EA Forum Survey",
"url": "https://forum.effectivealtruism.org/posts/8CM9vZ2nnQsWJNsHx/existential-risk-from-ai-survey-results",
"resourceId": "0dee84dcc4f4076f",
"resourceTitle": "\"Existential risk from AI\" survey results"
},
{
"text": "arXiv Expert Survey",
"url": "https://arxiv.org/html/2502.14870v1",
"resourceId": "4e7f0e37bace9678",
"resourceTitle": "Roman Yampolskiy"
},
{
"text": "10-20%",
"url": "https://en.wikipedia.org/wiki/P(doom",
"resourceId": "ffb7dcedaa0a8711",
"resourceTitle": "Survey of AI researchers"
},
{
"text": "Greenblatt et al. (2024)",
"url": "https://www.anthropic.com/research/alignment-faking",
"resourceId": "c2cfd72baafd64a9",
"resourceTitle": "Anthropic's 2024 alignment faking study"
},
{
"text": "Anthropic's 2025 research recommendations",
"url": "https://alignment.anthropic.com/2025/recommended-directions/",
"resourceId": "7ae6b3be2d2043c1",
"resourceTitle": "Anthropic: Recommended Directions for AI Safety Research"
},
{
"text": "MATS program",
"url": "https://www.matsprogram.org/",
"resourceId": "ba3a8bd9c8404d7b",
"resourceTitle": "MATS Research Program"
},
{
"text": "AI Safety Index",
"url": "https://futureoflife.org/ai-safety-index-summer-2025/",
"resourceId": "df46edd6fa2078d1",
"resourceTitle": "FLI AI Safety Index Summer 2025"
},
{
"text": "Anthropic study",
"url": "https://arxiv.org/abs/2401.05566",
"resourceId": "e5c0904211c7d0cc",
"resourceTitle": "Sleeper Agents: Training Deceptive LLMs that Persist Through Safety Training"
},
{
"text": "Simple probes",
"url": "https://www.anthropic.com/research/probes-catch-sleeper-agents",
"resourceId": "72c1254d07071bf7",
"resourceTitle": "Anthropic's follow-up research on defection probes"
},
{
"text": "Greenblatt et al. 2024",
"url": "https://www.anthropic.com/research/alignment-faking",
"resourceId": "c2cfd72baafd64a9",
"resourceTitle": "Anthropic's 2024 alignment faking study"
},
{
"text": "Process supervision",
"url": "https://arxiv.org/abs/2305.20050",
"resourceId": "eea50d24e41938ed",
"resourceTitle": "OpenAI's influential \"Let's Verify Step by Step\" study"
},
{
"text": "AI Safety Index (Summer 2025)",
"url": "https://futureoflife.org/ai-safety-index-summer-2025/",
"resourceId": "df46edd6fa2078d1",
"resourceTitle": "FLI AI Safety Index Summer 2025"
},
{
"text": "February 2025 arXiv study",
"url": "https://arxiv.org/html/2502.14870v1",
"resourceId": "4e7f0e37bace9678",
"resourceTitle": "Roman Yampolskiy"
}
],
"unconvertedLinkCount": 23,
"convertedLinkCount": 26,
"backlinkCount": 4,
"hallucinationRisk": {
"level": "medium",
"score": 45,
"factors": [
"no-citations",
"conceptual-content"
]
},
"entityType": "crux",
"redundancy": {
"maxSimilarity": 22,
"similarPages": [
{
"id": "why-alignment-hard",
"title": "Why Alignment Might Be Hard",
"path": "/knowledge-base/debates/why-alignment-hard/",
"similarity": 22
},
{
"id": "agentic-ai",
"title": "Agentic AI",
"path": "/knowledge-base/capabilities/agentic-ai/",
"similarity": 21
},
{
"id": "language-models",
"title": "Large Language Models",
"path": "/knowledge-base/capabilities/language-models/",
"similarity": 21
},
{
"id": "solutions",
"title": "AI Safety Solution Cruxes",
"path": "/knowledge-base/cruxes/solutions/",
"similarity": 21
},
{
"id": "is-ai-xrisk-real",
"title": "Is AI Existential Risk Real?",
"path": "/knowledge-base/debates/is-ai-xrisk-real/",
"similarity": 21
}
]
},
"changeHistory": [
{
"date": "2026-03-15",
"branch": "auto-update/2026-03-15",
"title": "Auto-improve (standard): AI Accident Risk Cruxes",
"summary": "Improved \"AI Accident Risk Cruxes\" via standard pipeline (496.7s). Quality score: 71. Issues resolved: Truncated content: The page ends mid-sentence in the Researc; EntityLink E1 ('adversarial-robustness') and E2 ('agentic-ai; EntityLink E1030 ('claude-3-5-sonnet') is used in Quick Asse.",
"duration": "496.7s",
"cost": "$5-8"
},
{
"date": "2026-03-14",
"branch": "auto-update/2026-03-14",
"title": "Auto-improve (standard): AI Accident Risk Cruxes",
"summary": "Improved \"AI Accident Risk Cruxes\" via standard pipeline (472.9s). Quality score: 71. Issues resolved: Truncated content: The page ends mid-sentence in the 'Resear; MDX Mermaid chart contains EntityLink JSX components inside ; Comparison pattern violation: description frontmatter contai.",
"duration": "472.9s",
"cost": "$5-8"
},
{
"date": "2026-03-12",
"branch": "auto-update/2026-03-12",
"title": "Auto-improve (standard): AI Accident Risk Cruxes",
"summary": "Improved \"AI Accident Risk Cruxes\" via standard pipeline (480.9s). Quality score: 74. Issues resolved: Frontmatter description field contains raw '<' character in ; Mermaid chart contains an EntityLink MDX component inside a ; Table in 'Sources and Resources > 2024-2025 Key Research' ha.",
"duration": "480.9s",
"cost": "$5-8"
}
],
"coverage": {
"passing": 9,
"total": 13,
"targets": {
"tables": 26,
"diagrams": 3,
"internalLinks": 51,
"externalLinks": 32,
"footnotes": 19,
"references": 19
},
"actuals": {
"tables": 26,
"diagrams": 1,
"internalLinks": 115,
"externalLinks": 74,
"footnotes": 0,
"references": 42,
"quotesWithQuotes": 0,
"quotesTotal": 0,
"accuracyChecked": 0,
"accuracyTotal": 0
},
"items": {
"summary": "green",
"schedule": "green",
"entity": "green",
"editHistory": "green",
"overview": "green",
"tables": "green",
"diagrams": "amber",
"internalLinks": "green",
"externalLinks": "green",
"footnotes": "red",
"references": "green",
"quotes": "red",
"accuracy": "red"
},
"editHistoryCount": 3,
"ratingsString": "N:5.2 R:6.8 A:7.3 C:7.5"
},
"readerRank": 5,
"researchRank": 1,
"recommendedScore": 199.51
}External Links
{
"lesswrong": "https://www.lesswrong.com/tag/ai-risk"
}Backlinks (4)
| id | title | type | relationship |
|---|---|---|---|
| __index__/knowledge-base/cruxes | Key Cruxes | concept | — |
| __index__/knowledge-base | Knowledge Base | concept | — |
| risk-activation-timeline | Risk Activation Timeline Model | analysis | — |
| rlhf | RLHF / Constitutional AI | research-area | — |