METR
metrorganizationPath: /knowledge-base/organizations/metr/
E201Entity ID (EID)
Page Recorddatabase.json — merged from MDX frontmatter + Entity YAML + computed metrics at build time
{
"id": "metr",
"wikiId": "E201",
"path": "/knowledge-base/organizations/metr/",
"filePath": "knowledge-base/organizations/metr.mdx",
"title": "METR",
"quality": 66,
"readerImportance": 83.5,
"researchImportance": 50.5,
"tacticalValue": 80,
"contentFormat": "article",
"causalLevel": null,
"lastUpdated": "2026-01-29",
"dateCreated": "2026-02-15",
"summary": "METR conducts pre-deployment dangerous capability evaluations for frontier AI labs (OpenAI, Anthropic, Google DeepMind), testing autonomous replication, cybersecurity, CBRN, and manipulation capabilities using a 77-task suite. Their research shows task completion time horizons doubling every 7 months (accelerating to 4 months in 2024-2025), with GPT-5 achieving 2h17m 50%-time horizon; no models yet capable of autonomous replication but gap narrowing rapidly.",
"description": "Model Evaluation and Threat Research conducts dangerous capability evaluations for frontier AI models, testing for autonomous replication, cybersecurity, CBRN, and manipulation capabilities.",
"ratings": {
"novelty": 4.5,
"rigor": 6.5,
"completeness": 7.5,
"actionability": 7
},
"category": "organizations",
"subcategory": "safety-orgs",
"clusters": [
"ai-safety",
"community",
"governance"
],
"metrics": {
"wordCount": 4386,
"tableCount": 8,
"diagramCount": 1,
"internalLinks": 45,
"externalLinks": 10,
"footnoteCount": 112,
"bulletRatio": 0.06,
"sectionCount": 29,
"hasOverview": true,
"structuralScore": 15
},
"suggestedQuality": 100,
"updateFrequency": 21,
"evergreen": true,
"wordCount": 4386,
"unconvertedLinks": [
{
"text": "time horizons paper",
"url": "https://arxiv.org/abs/2503.14499",
"resourceId": "ddd93038c44fbd36",
"resourceTitle": "[2503.14499] Measuring AI Ability to Complete Long Software Tasks"
},
{
"text": "March 2025 research",
"url": "https://metr.org/blog/2025-03-19-measuring-ai-ability-to-complete-long-tasks/",
"resourceId": "271fc5f73a8304b2",
"resourceTitle": "Measuring AI Ability to Complete Long Tasks - METR"
},
{
"text": "December 2025 analysis",
"url": "https://metr.org/blog/2025-12-09-common-elements-of-frontier-ai-safety-policies/",
"resourceId": "c8782940b880d00f",
"resourceTitle": "METR's analysis of 12 companies"
},
{
"text": "UK AI Safety Institute Frontier AI Trends Report",
"url": "https://www.aisi.gov.uk/frontier-ai-trends-report",
"resourceId": "7042c7f8de04ccb1",
"resourceTitle": "AISI Frontier AI Trends"
}
],
"unconvertedLinkCount": 4,
"convertedLinkCount": 23,
"backlinkCount": 70,
"hallucinationRisk": {
"level": "medium",
"score": 45,
"factors": [
"biographical-claims",
"well-cited"
]
},
"entityType": "organization",
"redundancy": {
"maxSimilarity": 23,
"similarPages": [
{
"id": "ai-safety-institutes",
"title": "AI Safety Institutes",
"path": "/knowledge-base/responses/ai-safety-institutes/",
"similarity": 23
},
{
"id": "us-aisi",
"title": "US AI Safety Institute (now CAISI)",
"path": "/knowledge-base/organizations/us-aisi/",
"similarity": 22
},
{
"id": "scalable-oversight",
"title": "Scalable Oversight",
"path": "/knowledge-base/responses/scalable-oversight/",
"similarity": 21
},
{
"id": "voluntary-commitments",
"title": "Voluntary Industry Commitments",
"path": "/knowledge-base/responses/voluntary-commitments/",
"similarity": 21
},
{
"id": "agentic-ai",
"title": "Agentic AI",
"path": "/knowledge-base/capabilities/agentic-ai/",
"similarity": 20
}
]
},
"changeHistory": [
{
"date": "2026-02-18",
"branch": "claude/fix-issue-240-N5irU",
"title": "Surface tacticalValue in /wiki table and score 53 pages",
"summary": "Added `tacticalValue` to `ExploreItem` interface, `getExploreItems()` mappings, the `/wiki` explore table (new sortable \"Tact.\" column), and the card view sort dropdown. Scored 49 new pages with tactical values (4 were already scored), bringing total to 53.",
"model": "sonnet-4",
"duration": "~30min"
}
],
"coverage": {
"passing": 8,
"total": 13,
"targets": {
"tables": 18,
"diagrams": 2,
"internalLinks": 35,
"externalLinks": 22,
"footnotes": 13,
"references": 13
},
"actuals": {
"tables": 8,
"diagrams": 1,
"internalLinks": 45,
"externalLinks": 10,
"footnotes": 112,
"references": 18,
"quotesWithQuotes": 0,
"quotesTotal": 0,
"accuracyChecked": 0,
"accuracyTotal": 0
},
"items": {
"summary": "green",
"schedule": "green",
"entity": "green",
"editHistory": "green",
"overview": "green",
"tables": "amber",
"diagrams": "amber",
"internalLinks": "green",
"externalLinks": "amber",
"footnotes": "green",
"references": "green",
"quotes": "red",
"accuracy": "red"
},
"editHistoryCount": 1,
"ratingsString": "N:4.5 R:6.5 A:7 C:7.5"
},
"readerRank": 63,
"researchRank": 276,
"recommendedScore": 187.27
}External Links
{
"eaForum": "https://forum.effectivealtruism.org/topics/metr"
}Backlinks (70)
| id | title | type | relationship |
|---|---|---|---|
| capability-evaluations | Capability Evaluations | concept | — |
| apollo-research | Apollo Research | organization | — |
| far-ai | FAR AI | organization | — |
| uk-aisi | UK AI Safety Institute | organization | — |
| us-aisi | US AI Safety Institute (now CAISI) | organization | — |
| arc-evals | ARC Evaluations | organization | — |
| astralis-foundation | Astralis Foundation | organization | leads-to |
| ajeya-cotra | Ajeya Cotra | person | — |
| evals | AI Evaluations | research-area | research |
| scalable-eval-approaches | Scalable Eval Approaches | approach | — |
| dangerous-cap-evals | Dangerous Capability Evaluations | approach | — |
| capability-elicitation | Capability Elicitation | approach | — |
| evaluation | AI Evaluation | approach | — |
| model-auditing | Third-Party Model Auditing | approach | — |
| evals-governance | Evals-Based Deployment Gates | approach | — |
| rsp | Responsible Scaling Policies | approach | — |
| training-programs | AI Safety Training Programs | approach | — |
| sandboxing | Sandboxing / Containment | approach | — |
| tool-restrictions | Tool-Use Restrictions | approach | — |
| coding | Autonomous Coding | capability | — |
| large-language-models | Large Language Models | concept | — |
| long-horizon | Long-Horizon Autonomous Tasks | capability | — |
| self-improvement | Self-Improvement and Recursive Enhancement | capability | — |
| situational-awareness | Situational Awareness | capability | — |
| accident-risks | AI Accident Risk Cruxes | crux | — |
| solutions | AI Safety Solution Cruxes | crux | — |
| is-ai-xrisk-real | Is AI Existential Risk Real? | crux | — |
| ai-compute-scaling-metrics | AI Compute Scaling Metrics | analysis | — |
| ai-timelines | AI Timelines | concept | — |
| bioweapons-ai-uplift | AI Uplift Assessment Model | analysis | — |
| capability-alignment-race | Capability-Alignment Race Model | analysis | — |
| intervention-effectiveness-matrix | Intervention Effectiveness Matrix | analysis | — |
| model-organisms-of-misalignment | Model Organisms of Misalignment | analysis | — |
| risk-activation-timeline | Risk Activation Timeline Model | analysis | — |
| risk-interaction-network | Risk Interaction Network | analysis | — |
| safety-spending-at-scale | Safety Spending at Scale | analysis | — |
| ada-lovelace-institute | Ada Lovelace Institute | organization | — |
| ai-futures-project | AI Futures Project | organization | — |
| apart-research | Apart Research | organization | — |
| arc | Alignment Research Center (ARC) | organization | — |
| cais | Center for AI Safety (CAIS) | organization | — |
| ea-funding-absorption-capacity | EA Funding Absorption Capacity | concept | — |
| ea-global | EA Global | organization | — |
| ftx-collapse-ea-funding-lessons | FTX Collapse: Lessons for EA Funding Resilience | concept | — |
| funders-overview | Longtermist Funders (Overview) | concept | — |
| government-orgs-overview | Government AI Safety Organizations (Overview) | concept | — |
| __index__/knowledge-base/organizations | Organizations | concept | — |
| safety-orgs-overview | AI Safety Organizations (Overview) | concept | — |
| sff | Survival and Flourishing Fund (SFF) | organization | — |
| the-foundation-layer | The Foundation Layer | organization | — |
| dario-amodei | Dario Amodei | person | — |
| david-dalrymple | David Dalrymple | person | — |
| dustin-moskovitz | Dustin Moskovitz | person | — |
| jaan-tallinn | Jaan Tallinn | person | — |
| alignment | AI Alignment | approach | — |
| constitutional-ai | Constitutional AI | approach | — |
| coordination-tech | AI Governance Coordination Technologies | approach | — |
| corporate | Corporate AI Safety Responses | approach | — |
| eval-saturation | Eval Saturation & The Evals Gap | approach | — |
| red-teaming | Red Teaming | research-area | — |
| scheming-detection | Scheming & Deception Detection | approach | — |
| technical-research | Technical AI Safety Research | crux | — |
| trump-eo-14179 | Executive Order 14179: Removing Barriers to American Leadership in AI | policy | — |
| deceptive-alignment | Deceptive Alignment | risk | — |
| emergent-capabilities | Emergent Capabilities | risk | — |
| enfeeblement | AI-Induced Enfeeblement | risk | — |
| existential-risk | Existential Risk from AI | concept | — |
| reward-hacking | Reward Hacking | risk | — |
| scheming | Scheming | risk | — |
| trust-cascade | AI Trust Cascade Failure | risk | — |