Sharp Left Turn
sharp-left-turnriskPath: /knowledge-base/risks/sharp-left-turn/
E281Entity ID (EID)
Page Recorddatabase.json — merged from MDX frontmatter + Entity YAML + computed metrics at build time
{
"id": "sharp-left-turn",
"wikiId": "E281",
"path": "/knowledge-base/risks/sharp-left-turn/",
"filePath": "knowledge-base/risks/sharp-left-turn.mdx",
"title": "Sharp Left Turn",
"quality": 69,
"readerImportance": 56.5,
"researchImportance": 89,
"tacticalValue": null,
"contentFormat": "article",
"causalLevel": "pathway",
"lastUpdated": "2026-01-29",
"dateCreated": "2026-02-15",
"summary": "The Sharp Left Turn hypothesis proposes AI capabilities may generalize discontinuously while alignment fails to transfer, with compound probability estimated at 15-40% by 2027-2035. Empirical evidence includes 78% alignment faking rate in Claude 3 Opus under RL pressure and goal misgeneralization in current systems, though catastrophic failures haven't yet occurred in deployed models.",
"description": "The Sharp Left Turn hypothesis proposes that AI capabilities may generalize discontinuously to new domains while alignment properties fail to transfer, creating catastrophic misalignment risk.",
"ratings": {
"novelty": 5.8,
"rigor": 7.2,
"completeness": 7.8,
"actionability": 6.3
},
"category": "risks",
"subcategory": "accident",
"clusters": [
"ai-safety",
"governance"
],
"metrics": {
"wordCount": 4303,
"tableCount": 13,
"diagramCount": 2,
"internalLinks": 35,
"externalLinks": 36,
"footnoteCount": 0,
"bulletRatio": 0.11,
"sectionCount": 32,
"hasOverview": true,
"structuralScore": 15
},
"suggestedQuality": 100,
"updateFrequency": 45,
"evergreen": true,
"wordCount": 4303,
"unconvertedLinks": [
{
"text": "Alignment Faking (Anthropic/Redwood)",
"url": "https://www.anthropic.com/research/alignment-faking",
"resourceId": "c2cfd72baafd64a9",
"resourceTitle": "Anthropic's 2024 alignment faking study"
},
{
"text": "Goal Misgeneralization (ICML)",
"url": "https://proceedings.mlr.press/v162/langosco22a.html",
"resourceId": "c4dda1bfea152190",
"resourceTitle": "Langosco et al. (2022)"
},
{
"text": "Emergent Abilities (TMLR)",
"url": "https://arxiv.org/abs/2206.07682",
"resourceId": "2d76bc16fcc7825d",
"resourceTitle": "Emergent Abilities"
},
{
"text": "Emergent Mirage (NeurIPS)",
"url": "https://arxiv.org/abs/2304.15004",
"resourceId": "22db72cf2a806d3b",
"resourceTitle": "\"Are Emergent Abilities a Mirage?\""
},
{
"text": "Natural Emergent Misalignment (Anthropic)",
"url": "https://www.anthropic.com/research/emergent-misalignment-reward-hacking",
"resourceId": "7a21b9c5237a8a16",
"resourceTitle": "Natural Emergent Misalignment from Reward Hacking"
},
{
"text": "Nate Soares",
"url": "https://intelligence.org/2022/07/04/a-central-ai-alignment-problem/",
"resourceId": "83ae4cb7d004910a",
"resourceTitle": "A Central AI Alignment Problem: Behavioral Disposition vs. World State Objectives"
},
{
"text": "Victoria Krakovna",
"url": "https://vkrakovna.wordpress.com/2023/12/20/retrospective-on-ai-threat-models/",
"resourceId": "6980863a6d7d16d9",
"resourceTitle": "\"Retrospective on My Posts on AI Threat Models\""
},
{
"text": "Wei et al. (2022)",
"url": "https://arxiv.org/abs/2206.07682",
"resourceId": "2d76bc16fcc7825d",
"resourceTitle": "Emergent Abilities"
},
{
"text": "Schaeffer et al. (2023)",
"url": "https://arxiv.org/abs/2304.15004",
"resourceId": "22db72cf2a806d3b",
"resourceTitle": "\"Are Emergent Abilities a Mirage?\""
},
{
"text": "75-point improvement over GPT-3.5",
"url": "https://openai.com/research/gpt-4",
"resourceId": "9b255e0255d7dd86",
"resourceTitle": "GPT-4 Technical Report and Research Overview"
},
{
"text": "OpenAI system cards",
"url": "https://openai.com/research",
"resourceId": "e9aaa7b5e18f9f41",
"resourceTitle": "OpenAI: Model Behavior"
},
{
"text": "full paper",
"url": "https://assets.anthropic.com/m/983c85a201a962f/original/Alignment-Faking-in-Large-Language-Models-full-paper.pdf",
"resourceId": "1fb3c217c5e296b6",
"resourceTitle": "alignment faking in 78% of tests"
},
{
"text": "metric choice effects",
"url": "https://arxiv.org/abs/2304.15004",
"resourceId": "22db72cf2a806d3b",
"resourceTitle": "\"Are Emergent Abilities a Mirage?\""
},
{
"text": "Nate Soares (MIRI)",
"url": "https://intelligence.org/2022/07/04/a-central-ai-alignment-problem/",
"resourceId": "83ae4cb7d004910a",
"resourceTitle": "A Central AI Alignment Problem: Behavioral Disposition vs. World State Objectives"
},
{
"text": "Victoria Krakovna (DeepMind)",
"url": "https://vkrakovna.wordpress.com/2023/12/20/retrospective-on-ai-threat-models/",
"resourceId": "6980863a6d7d16d9",
"resourceTitle": "\"Retrospective on My Posts on AI Threat Models\""
},
{
"text": "Holden Karnofsky",
"url": "https://www.cold-takes.com/",
"resourceId": "859ff786a553505f",
"resourceTitle": "Cold Takes – Holden Karnofsky's Blog"
},
{
"text": "Paul Christiano (AISI)",
"url": "https://www.alignmentforum.org/users/paulfchristiano",
"resourceId": "ebb2f8283d5a6014",
"resourceTitle": "Paul Christiano's AI Alignment Research"
},
{
"text": "Anthropic Alignment Science",
"url": "https://alignment.anthropic.com/",
"resourceId": "5a651b8ed18ffeb1",
"resourceTitle": "Anthropic Alignment Science Blog"
},
{
"text": "\"A Central AI Alignment Problem: Capabilities Generalization, and the Sharp Left Turn\"",
"url": "https://intelligence.org/2022/07/04/a-central-ai-alignment-problem/",
"resourceId": "83ae4cb7d004910a",
"resourceTitle": "A Central AI Alignment Problem: Behavioral Disposition vs. World State Objectives"
},
{
"text": "\"Refining the Sharp Left Turn Threat Model\"",
"url": "https://www.lesswrong.com/posts/GNhMPAWcfBCASy8e6/a-central-ai-alignment-problem-capabilities-generalization",
"resourceId": "a7367776fbe8b441",
"resourceTitle": "A central AI alignment problem: capabilities generalization, and the sharp left turn"
},
{
"text": "\"Retrospective on My Posts on AI Threat Models\"",
"url": "https://vkrakovna.wordpress.com/2023/12/20/retrospective-on-ai-threat-models/",
"resourceId": "6980863a6d7d16d9",
"resourceTitle": "\"Retrospective on My Posts on AI Threat Models\""
},
{
"text": "\"Alignment Faking in Large Language Models\"",
"url": "https://www.anthropic.com/research/alignment-faking",
"resourceId": "c2cfd72baafd64a9",
"resourceTitle": "Anthropic's 2024 alignment faking study"
},
{
"text": "Full paper",
"url": "https://assets.anthropic.com/m/983c85a201a962f/original/Alignment-Faking-in-Large-Language-Models-full-paper.pdf",
"resourceId": "1fb3c217c5e296b6",
"resourceTitle": "alignment faking in 78% of tests"
},
{
"text": "\"Natural Emergent Misalignment from Reward Hacking\"",
"url": "https://www.anthropic.com/research/emergent-misalignment-reward-hacking",
"resourceId": "7a21b9c5237a8a16",
"resourceTitle": "Natural Emergent Misalignment from Reward Hacking"
},
{
"text": "\"Goal Misgeneralization in Deep Reinforcement Learning\"",
"url": "https://proceedings.mlr.press/v162/langosco22a.html",
"resourceId": "c4dda1bfea152190",
"resourceTitle": "Langosco et al. (2022)"
},
{
"text": "\"Emergent Abilities of Large Language Models\"",
"url": "https://arxiv.org/abs/2206.07682",
"resourceId": "2d76bc16fcc7825d",
"resourceTitle": "Emergent Abilities"
},
{
"text": "\"Are Emergent Abilities of Large Language Models a Mirage?\"",
"url": "https://arxiv.org/abs/2304.15004",
"resourceId": "22db72cf2a806d3b",
"resourceTitle": "\"Are Emergent Abilities a Mirage?\""
},
{
"text": "\"Risks from Learned Optimization in Advanced Machine Learning Systems\"",
"url": "https://arxiv.org/abs/1906.01820",
"resourceId": "c4858d4ef280d8e6",
"resourceTitle": "Risks from Learned Optimization"
},
{
"text": "\"The Alignment Problem from a Deep Learning Perspective\"",
"url": "https://arxiv.org/abs/2209.00626",
"resourceId": "9124298fbb913c3d",
"resourceTitle": "Gaming RLHF evaluation"
},
{
"text": "\"Scheming AIs: Will AIs Fake Alignment?\"",
"url": "https://arxiv.org/abs/2311.08379",
"resourceId": "ad8b09f4eba993b3",
"resourceTitle": "Carlsmith (2023) - Scheming AIs"
}
],
"unconvertedLinkCount": 30,
"convertedLinkCount": 24,
"backlinkCount": 7,
"hallucinationRisk": {
"level": "medium",
"score": 40,
"factors": [
"no-citations",
"high-rigor"
]
},
"entityType": "risk",
"redundancy": {
"maxSimilarity": 22,
"similarPages": [
{
"id": "goal-misgeneralization",
"title": "Goal Misgeneralization",
"path": "/knowledge-base/risks/goal-misgeneralization/",
"similarity": 22
},
{
"id": "mesa-optimization",
"title": "Mesa-Optimization",
"path": "/knowledge-base/risks/mesa-optimization/",
"similarity": 22
},
{
"id": "instrumental-convergence",
"title": "Instrumental Convergence",
"path": "/knowledge-base/risks/instrumental-convergence/",
"similarity": 21
},
{
"id": "treacherous-turn",
"title": "Treacherous Turn",
"path": "/knowledge-base/risks/treacherous-turn/",
"similarity": 21
},
{
"id": "situational-awareness",
"title": "Situational Awareness",
"path": "/knowledge-base/capabilities/situational-awareness/",
"similarity": 20
}
]
},
"coverage": {
"passing": 8,
"total": 13,
"targets": {
"tables": 17,
"diagrams": 2,
"internalLinks": 34,
"externalLinks": 22,
"footnotes": 13,
"references": 13
},
"actuals": {
"tables": 13,
"diagrams": 2,
"internalLinks": 35,
"externalLinks": 36,
"footnotes": 0,
"references": 31,
"quotesWithQuotes": 0,
"quotesTotal": 0,
"accuracyChecked": 0,
"accuracyTotal": 0
},
"items": {
"summary": "green",
"schedule": "green",
"entity": "green",
"editHistory": "red",
"overview": "green",
"tables": "amber",
"diagrams": "green",
"internalLinks": "green",
"externalLinks": "green",
"footnotes": "red",
"references": "green",
"quotes": "red",
"accuracy": "red"
},
"ratingsString": "N:5.8 R:7.2 A:6.3 C:7.8"
},
"readerRank": 255,
"researchRank": 33,
"recommendedScore": 179.77
}External Links
{
"lesswrong": "https://www.lesswrong.com/tag/sharp-left-turn",
"stampy": "https://aisafety.info/questions/9KE6/What-is-the-sharp-left-turn"
}Backlinks (7)
| id | title | type | relationship |
|---|---|---|---|
| miri | Machine Intelligence Research Institute (MIRI) | organization | — |
| eliezer-yudkowsky | Eliezer Yudkowsky | person | — |
| emergent-capabilities | Emergent Capabilities | risk | — |
| alignment-robustness-trajectory | Alignment Robustness Trajectory | analysis | — |
| goal-misgeneralization-probability | Goal Misgeneralization Probability Model | analysis | — |
| agent-foundations | Agent Foundations | approach | — |
| accident-overview | Accident Risks (Overview) | concept | — |