Sharp Left Turn

sharp-left-turnriskPath: /knowledge-base/risks/sharp-left-turn/

E281Entity ID (EID)

← Back to page7 backlinksQuality: 69Updated: 2026-01-29

Page Recorddatabase.json — merged from MDX frontmatter + Entity YAML + computed metrics at build time

{
  "id": "sharp-left-turn",
  "wikiId": "E281",
  "path": "/knowledge-base/risks/sharp-left-turn/",
  "filePath": "knowledge-base/risks/sharp-left-turn.mdx",
  "title": "Sharp Left Turn",
  "quality": 69,
  "readerImportance": 56.5,
  "researchImportance": 89,
  "tacticalValue": null,
  "contentFormat": "article",
  "causalLevel": "pathway",
  "lastUpdated": "2026-01-29",
  "dateCreated": "2026-02-15",
  "summary": "The Sharp Left Turn hypothesis proposes AI capabilities may generalize discontinuously while alignment fails to transfer, with compound probability estimated at 15-40% by 2027-2035. Empirical evidence includes 78% alignment faking rate in Claude 3 Opus under RL pressure and goal misgeneralization in current systems, though catastrophic failures haven't yet occurred in deployed models.",
  "description": "The Sharp Left Turn hypothesis proposes that AI capabilities may generalize discontinuously to new domains while alignment properties fail to transfer, creating catastrophic misalignment risk.",
  "ratings": {
    "novelty": 5.8,
    "rigor": 7.2,
    "completeness": 7.8,
    "actionability": 6.3
  },
  "category": "risks",
  "subcategory": "accident",
  "clusters": [
    "ai-safety",
    "governance"
  ],
  "metrics": {
    "wordCount": 4303,
    "tableCount": 13,
    "diagramCount": 2,
    "internalLinks": 35,
    "externalLinks": 36,
    "footnoteCount": 0,
    "bulletRatio": 0.11,
    "sectionCount": 32,
    "hasOverview": true,
    "structuralScore": 15
  },
  "suggestedQuality": 100,
  "updateFrequency": 45,
  "evergreen": true,
  "wordCount": 4303,
  "unconvertedLinks": [
    {
      "text": "Alignment Faking (Anthropic/Redwood)",
      "url": "https://www.anthropic.com/research/alignment-faking",
      "resourceId": "c2cfd72baafd64a9",
      "resourceTitle": "Anthropic's 2024 alignment faking study"
    },
    {
      "text": "Goal Misgeneralization (ICML)",
      "url": "https://proceedings.mlr.press/v162/langosco22a.html",
      "resourceId": "c4dda1bfea152190",
      "resourceTitle": "Langosco et al. (2022)"
    },
    {
      "text": "Emergent Abilities (TMLR)",
      "url": "https://arxiv.org/abs/2206.07682",
      "resourceId": "2d76bc16fcc7825d",
      "resourceTitle": "Emergent Abilities"
    },
    {
      "text": "Emergent Mirage (NeurIPS)",
      "url": "https://arxiv.org/abs/2304.15004",
      "resourceId": "22db72cf2a806d3b",
      "resourceTitle": "\"Are Emergent Abilities a Mirage?\""
    },
    {
      "text": "Natural Emergent Misalignment (Anthropic)",
      "url": "https://www.anthropic.com/research/emergent-misalignment-reward-hacking",
      "resourceId": "7a21b9c5237a8a16",
      "resourceTitle": "Natural Emergent Misalignment from Reward Hacking"
    },
    {
      "text": "Nate Soares",
      "url": "https://intelligence.org/2022/07/04/a-central-ai-alignment-problem/",
      "resourceId": "83ae4cb7d004910a",
      "resourceTitle": "A Central AI Alignment Problem: Behavioral Disposition vs. World State Objectives"
    },
    {
      "text": "Victoria Krakovna",
      "url": "https://vkrakovna.wordpress.com/2023/12/20/retrospective-on-ai-threat-models/",
      "resourceId": "6980863a6d7d16d9",
      "resourceTitle": "\"Retrospective on My Posts on AI Threat Models\""
    },
    {
      "text": "Wei et al. (2022)",
      "url": "https://arxiv.org/abs/2206.07682",
      "resourceId": "2d76bc16fcc7825d",
      "resourceTitle": "Emergent Abilities"
    },
    {
      "text": "Schaeffer et al. (2023)",
      "url": "https://arxiv.org/abs/2304.15004",
      "resourceId": "22db72cf2a806d3b",
      "resourceTitle": "\"Are Emergent Abilities a Mirage?\""
    },
    {
      "text": "75-point improvement over GPT-3.5",
      "url": "https://openai.com/research/gpt-4",
      "resourceId": "9b255e0255d7dd86",
      "resourceTitle": "GPT-4 Technical Report and Research Overview"
    },
    {
      "text": "OpenAI system cards",
      "url": "https://openai.com/research",
      "resourceId": "e9aaa7b5e18f9f41",
      "resourceTitle": "OpenAI: Model Behavior"
    },
    {
      "text": "full paper",
      "url": "https://assets.anthropic.com/m/983c85a201a962f/original/Alignment-Faking-in-Large-Language-Models-full-paper.pdf",
      "resourceId": "1fb3c217c5e296b6",
      "resourceTitle": "alignment faking in 78% of tests"
    },
    {
      "text": "metric choice effects",
      "url": "https://arxiv.org/abs/2304.15004",
      "resourceId": "22db72cf2a806d3b",
      "resourceTitle": "\"Are Emergent Abilities a Mirage?\""
    },
    {
      "text": "Nate Soares (MIRI)",
      "url": "https://intelligence.org/2022/07/04/a-central-ai-alignment-problem/",
      "resourceId": "83ae4cb7d004910a",
      "resourceTitle": "A Central AI Alignment Problem: Behavioral Disposition vs. World State Objectives"
    },
    {
      "text": "Victoria Krakovna (DeepMind)",
      "url": "https://vkrakovna.wordpress.com/2023/12/20/retrospective-on-ai-threat-models/",
      "resourceId": "6980863a6d7d16d9",
      "resourceTitle": "\"Retrospective on My Posts on AI Threat Models\""
    },
    {
      "text": "Holden Karnofsky",
      "url": "https://www.cold-takes.com/",
      "resourceId": "859ff786a553505f",
      "resourceTitle": "Cold Takes – Holden Karnofsky's Blog"
    },
    {
      "text": "Paul Christiano (AISI)",
      "url": "https://www.alignmentforum.org/users/paulfchristiano",
      "resourceId": "ebb2f8283d5a6014",
      "resourceTitle": "Paul Christiano's AI Alignment Research"
    },
    {
      "text": "Anthropic Alignment Science",
      "url": "https://alignment.anthropic.com/",
      "resourceId": "5a651b8ed18ffeb1",
      "resourceTitle": "Anthropic Alignment Science Blog"
    },
    {
      "text": "\"A Central AI Alignment Problem: Capabilities Generalization, and the Sharp Left Turn\"",
      "url": "https://intelligence.org/2022/07/04/a-central-ai-alignment-problem/",
      "resourceId": "83ae4cb7d004910a",
      "resourceTitle": "A Central AI Alignment Problem: Behavioral Disposition vs. World State Objectives"
    },
    {
      "text": "\"Refining the Sharp Left Turn Threat Model\"",
      "url": "https://www.lesswrong.com/posts/GNhMPAWcfBCASy8e6/a-central-ai-alignment-problem-capabilities-generalization",
      "resourceId": "a7367776fbe8b441",
      "resourceTitle": "A central AI alignment problem: capabilities generalization, and the sharp left turn"
    },
    {
      "text": "\"Retrospective on My Posts on AI Threat Models\"",
      "url": "https://vkrakovna.wordpress.com/2023/12/20/retrospective-on-ai-threat-models/",
      "resourceId": "6980863a6d7d16d9",
      "resourceTitle": "\"Retrospective on My Posts on AI Threat Models\""
    },
    {
      "text": "\"Alignment Faking in Large Language Models\"",
      "url": "https://www.anthropic.com/research/alignment-faking",
      "resourceId": "c2cfd72baafd64a9",
      "resourceTitle": "Anthropic's 2024 alignment faking study"
    },
    {
      "text": "Full paper",
      "url": "https://assets.anthropic.com/m/983c85a201a962f/original/Alignment-Faking-in-Large-Language-Models-full-paper.pdf",
      "resourceId": "1fb3c217c5e296b6",
      "resourceTitle": "alignment faking in 78% of tests"
    },
    {
      "text": "\"Natural Emergent Misalignment from Reward Hacking\"",
      "url": "https://www.anthropic.com/research/emergent-misalignment-reward-hacking",
      "resourceId": "7a21b9c5237a8a16",
      "resourceTitle": "Natural Emergent Misalignment from Reward Hacking"
    },
    {
      "text": "\"Goal Misgeneralization in Deep Reinforcement Learning\"",
      "url": "https://proceedings.mlr.press/v162/langosco22a.html",
      "resourceId": "c4dda1bfea152190",
      "resourceTitle": "Langosco et al. (2022)"
    },
    {
      "text": "\"Emergent Abilities of Large Language Models\"",
      "url": "https://arxiv.org/abs/2206.07682",
      "resourceId": "2d76bc16fcc7825d",
      "resourceTitle": "Emergent Abilities"
    },
    {
      "text": "\"Are Emergent Abilities of Large Language Models a Mirage?\"",
      "url": "https://arxiv.org/abs/2304.15004",
      "resourceId": "22db72cf2a806d3b",
      "resourceTitle": "\"Are Emergent Abilities a Mirage?\""
    },
    {
      "text": "\"Risks from Learned Optimization in Advanced Machine Learning Systems\"",
      "url": "https://arxiv.org/abs/1906.01820",
      "resourceId": "c4858d4ef280d8e6",
      "resourceTitle": "Risks from Learned Optimization"
    },
    {
      "text": "\"The Alignment Problem from a Deep Learning Perspective\"",
      "url": "https://arxiv.org/abs/2209.00626",
      "resourceId": "9124298fbb913c3d",
      "resourceTitle": "Gaming RLHF evaluation"
    },
    {
      "text": "\"Scheming AIs: Will AIs Fake Alignment?\"",
      "url": "https://arxiv.org/abs/2311.08379",
      "resourceId": "ad8b09f4eba993b3",
      "resourceTitle": "Carlsmith (2023) - Scheming AIs"
    }
  ],
  "unconvertedLinkCount": 30,
  "convertedLinkCount": 24,
  "backlinkCount": 7,
  "hallucinationRisk": {
    "level": "medium",
    "score": 40,
    "factors": [
      "no-citations",
      "high-rigor"
    ]
  },
  "entityType": "risk",
  "redundancy": {
    "maxSimilarity": 22,
    "similarPages": [
      {
        "id": "goal-misgeneralization",
        "title": "Goal Misgeneralization",
        "path": "/knowledge-base/risks/goal-misgeneralization/",
        "similarity": 22
      },
      {
        "id": "mesa-optimization",
        "title": "Mesa-Optimization",
        "path": "/knowledge-base/risks/mesa-optimization/",
        "similarity": 22
      },
      {
        "id": "instrumental-convergence",
        "title": "Instrumental Convergence",
        "path": "/knowledge-base/risks/instrumental-convergence/",
        "similarity": 21
      },
      {
        "id": "treacherous-turn",
        "title": "Treacherous Turn",
        "path": "/knowledge-base/risks/treacherous-turn/",
        "similarity": 21
      },
      {
        "id": "situational-awareness",
        "title": "Situational Awareness",
        "path": "/knowledge-base/capabilities/situational-awareness/",
        "similarity": 20
      }
    ]
  },
  "coverage": {
    "passing": 8,
    "total": 13,
    "targets": {
      "tables": 17,
      "diagrams": 2,
      "internalLinks": 34,
      "externalLinks": 22,
      "footnotes": 13,
      "references": 13
    },
    "actuals": {
      "tables": 13,
      "diagrams": 2,
      "internalLinks": 35,
      "externalLinks": 36,
      "footnotes": 0,
      "references": 31,
      "quotesWithQuotes": 0,
      "quotesTotal": 0,
      "accuracyChecked": 0,
      "accuracyTotal": 0
    },
    "items": {
      "summary": "green",
      "schedule": "green",
      "entity": "green",
      "editHistory": "red",
      "overview": "green",
      "tables": "amber",
      "diagrams": "green",
      "internalLinks": "green",
      "externalLinks": "green",
      "footnotes": "red",
      "references": "green",
      "quotes": "red",
      "accuracy": "red"
    },
    "ratingsString": "N:5.8 R:7.2 A:6.3 C:7.8"
  },
  "readerRank": 257,
  "researchRank": 33,
  "recommendedScore": 176.28
}

External Links

{
  "lesswrong": "https://www.lesswrong.com/tag/sharp-left-turn",
  "stampy": "https://aisafety.info/questions/9KE6/What-is-the-sharp-left-turn"
}

Backlinks (7)

id	title	type	relationship
miri	Machine Intelligence Research Institute (MIRI)	organization	—
eliezer-yudkowsky	Eliezer Yudkowsky	person	—
emergent-capabilities	Emergent Capabilities	risk	—
alignment-robustness-trajectory	Alignment Robustness Trajectory	analysis	—
goal-misgeneralization-probability	Goal Misgeneralization Probability Model	analysis	—
agent-foundations	Agent Foundations	approach	—
accident-overview	Accident Risks (Overview)	concept	—