Skip to content
Longterm Wiki

METR

metrorganizationPath: /knowledge-base/organizations/metr/
E201Entity ID (EID)
← Back to page70 backlinksQuality: 66Updated: 2026-01-29
Page Recorddatabase.json — merged from MDX frontmatter + Entity YAML + computed metrics at build time
{
  "id": "metr",
  "wikiId": "E201",
  "path": "/knowledge-base/organizations/metr/",
  "filePath": "knowledge-base/organizations/metr.mdx",
  "title": "METR",
  "quality": 66,
  "readerImportance": 83.5,
  "researchImportance": 50.5,
  "tacticalValue": 80,
  "contentFormat": "article",
  "causalLevel": null,
  "lastUpdated": "2026-01-29",
  "dateCreated": "2026-02-15",
  "summary": "METR conducts pre-deployment dangerous capability evaluations for frontier AI labs (OpenAI, Anthropic, Google DeepMind), testing autonomous replication, cybersecurity, CBRN, and manipulation capabilities using a 77-task suite. Their research shows task completion time horizons doubling every 7 months (accelerating to 4 months in 2024-2025), with GPT-5 achieving 2h17m 50%-time horizon; no models yet capable of autonomous replication but gap narrowing rapidly.",
  "description": "Model Evaluation and Threat Research conducts dangerous capability evaluations for frontier AI models, testing for autonomous replication, cybersecurity, CBRN, and manipulation capabilities.",
  "ratings": {
    "novelty": 4.5,
    "rigor": 6.5,
    "completeness": 7.5,
    "actionability": 7
  },
  "category": "organizations",
  "subcategory": "safety-orgs",
  "clusters": [
    "ai-safety",
    "community",
    "governance"
  ],
  "metrics": {
    "wordCount": 4386,
    "tableCount": 8,
    "diagramCount": 1,
    "internalLinks": 45,
    "externalLinks": 10,
    "footnoteCount": 112,
    "bulletRatio": 0.06,
    "sectionCount": 29,
    "hasOverview": true,
    "structuralScore": 15
  },
  "suggestedQuality": 100,
  "updateFrequency": 21,
  "evergreen": true,
  "wordCount": 4386,
  "unconvertedLinks": [
    {
      "text": "time horizons paper",
      "url": "https://arxiv.org/abs/2503.14499",
      "resourceId": "ddd93038c44fbd36",
      "resourceTitle": "[2503.14499] Measuring AI Ability to Complete Long Software Tasks"
    },
    {
      "text": "March 2025 research",
      "url": "https://metr.org/blog/2025-03-19-measuring-ai-ability-to-complete-long-tasks/",
      "resourceId": "271fc5f73a8304b2",
      "resourceTitle": "Measuring AI Ability to Complete Long Tasks - METR"
    },
    {
      "text": "December 2025 analysis",
      "url": "https://metr.org/blog/2025-12-09-common-elements-of-frontier-ai-safety-policies/",
      "resourceId": "c8782940b880d00f",
      "resourceTitle": "METR's analysis of 12 companies"
    },
    {
      "text": "UK AI Safety Institute Frontier AI Trends Report",
      "url": "https://www.aisi.gov.uk/frontier-ai-trends-report",
      "resourceId": "7042c7f8de04ccb1",
      "resourceTitle": "AISI Frontier AI Trends"
    }
  ],
  "unconvertedLinkCount": 4,
  "convertedLinkCount": 23,
  "backlinkCount": 70,
  "hallucinationRisk": {
    "level": "medium",
    "score": 45,
    "factors": [
      "biographical-claims",
      "well-cited"
    ]
  },
  "entityType": "organization",
  "redundancy": {
    "maxSimilarity": 23,
    "similarPages": [
      {
        "id": "ai-safety-institutes",
        "title": "AI Safety Institutes",
        "path": "/knowledge-base/responses/ai-safety-institutes/",
        "similarity": 23
      },
      {
        "id": "us-aisi",
        "title": "US AI Safety Institute (now CAISI)",
        "path": "/knowledge-base/organizations/us-aisi/",
        "similarity": 22
      },
      {
        "id": "scalable-oversight",
        "title": "Scalable Oversight",
        "path": "/knowledge-base/responses/scalable-oversight/",
        "similarity": 21
      },
      {
        "id": "voluntary-commitments",
        "title": "Voluntary Industry Commitments",
        "path": "/knowledge-base/responses/voluntary-commitments/",
        "similarity": 21
      },
      {
        "id": "agentic-ai",
        "title": "Agentic AI",
        "path": "/knowledge-base/capabilities/agentic-ai/",
        "similarity": 20
      }
    ]
  },
  "changeHistory": [
    {
      "date": "2026-02-18",
      "branch": "claude/fix-issue-240-N5irU",
      "title": "Surface tacticalValue in /wiki table and score 53 pages",
      "summary": "Added `tacticalValue` to `ExploreItem` interface, `getExploreItems()` mappings, the `/wiki` explore table (new sortable \"Tact.\" column), and the card view sort dropdown. Scored 49 new pages with tactical values (4 were already scored), bringing total to 53.",
      "model": "sonnet-4",
      "duration": "~30min"
    }
  ],
  "coverage": {
    "passing": 8,
    "total": 13,
    "targets": {
      "tables": 18,
      "diagrams": 2,
      "internalLinks": 35,
      "externalLinks": 22,
      "footnotes": 13,
      "references": 13
    },
    "actuals": {
      "tables": 8,
      "diagrams": 1,
      "internalLinks": 45,
      "externalLinks": 10,
      "footnotes": 112,
      "references": 18,
      "quotesWithQuotes": 0,
      "quotesTotal": 0,
      "accuracyChecked": 0,
      "accuracyTotal": 0
    },
    "items": {
      "summary": "green",
      "schedule": "green",
      "entity": "green",
      "editHistory": "green",
      "overview": "green",
      "tables": "amber",
      "diagrams": "amber",
      "internalLinks": "green",
      "externalLinks": "amber",
      "footnotes": "green",
      "references": "green",
      "quotes": "red",
      "accuracy": "red"
    },
    "editHistoryCount": 1,
    "ratingsString": "N:4.5 R:6.5 A:7 C:7.5"
  },
  "readerRank": 63,
  "researchRank": 276,
  "recommendedScore": 187.27
}
External Links
{
  "eaForum": "https://forum.effectivealtruism.org/topics/metr"
}
Backlinks (70)
idtitletyperelationship
capability-evaluationsCapability Evaluationsconcept
apollo-researchApollo Researchorganization
far-aiFAR AIorganization
uk-aisiUK AI Safety Instituteorganization
us-aisiUS AI Safety Institute (now CAISI)organization
arc-evalsARC Evaluationsorganization
astralis-foundationAstralis Foundationorganizationleads-to
ajeya-cotraAjeya Cotraperson
evalsAI Evaluationsresearch-arearesearch
scalable-eval-approachesScalable Eval Approachesapproach
dangerous-cap-evalsDangerous Capability Evaluationsapproach
capability-elicitationCapability Elicitationapproach
evaluationAI Evaluationapproach
model-auditingThird-Party Model Auditingapproach
evals-governanceEvals-Based Deployment Gatesapproach
rspResponsible Scaling Policiesapproach
training-programsAI Safety Training Programsapproach
sandboxingSandboxing / Containmentapproach
tool-restrictionsTool-Use Restrictionsapproach
codingAutonomous Codingcapability
large-language-modelsLarge Language Modelsconcept
long-horizonLong-Horizon Autonomous Taskscapability
self-improvementSelf-Improvement and Recursive Enhancementcapability
situational-awarenessSituational Awarenesscapability
accident-risksAI Accident Risk Cruxescrux
solutionsAI Safety Solution Cruxescrux
is-ai-xrisk-realIs AI Existential Risk Real?crux
ai-compute-scaling-metricsAI Compute Scaling Metricsanalysis
ai-timelinesAI Timelinesconcept
bioweapons-ai-upliftAI Uplift Assessment Modelanalysis
capability-alignment-raceCapability-Alignment Race Modelanalysis
intervention-effectiveness-matrixIntervention Effectiveness Matrixanalysis
model-organisms-of-misalignmentModel Organisms of Misalignmentanalysis
risk-activation-timelineRisk Activation Timeline Modelanalysis
risk-interaction-networkRisk Interaction Networkanalysis
safety-spending-at-scaleSafety Spending at Scaleanalysis
ada-lovelace-instituteAda Lovelace Instituteorganization
ai-futures-projectAI Futures Projectorganization
apart-researchApart Researchorganization
arcAlignment Research Center (ARC)organization
caisCenter for AI Safety (CAIS)organization
ea-funding-absorption-capacityEA Funding Absorption Capacityconcept
ea-globalEA Globalorganization
ftx-collapse-ea-funding-lessonsFTX Collapse: Lessons for EA Funding Resilienceconcept
funders-overviewLongtermist Funders (Overview)concept
government-orgs-overviewGovernment AI Safety Organizations (Overview)concept
__index__/knowledge-base/organizationsOrganizationsconcept
safety-orgs-overviewAI Safety Organizations (Overview)concept
sffSurvival and Flourishing Fund (SFF)organization
the-foundation-layerThe Foundation Layerorganization
dario-amodeiDario Amodeiperson
david-dalrympleDavid Dalrympleperson
dustin-moskovitzDustin Moskovitzperson
jaan-tallinnJaan Tallinnperson
alignmentAI Alignmentapproach
constitutional-aiConstitutional AIapproach
coordination-techAI Governance Coordination Technologiesapproach
corporateCorporate AI Safety Responsesapproach
eval-saturationEval Saturation & The Evals Gapapproach
red-teamingRed Teamingresearch-area
scheming-detectionScheming & Deception Detectionapproach
technical-researchTechnical AI Safety Researchcrux
trump-eo-14179Executive Order 14179: Removing Barriers to American Leadership in AIpolicy
deceptive-alignmentDeceptive Alignmentrisk
emergent-capabilitiesEmergent Capabilitiesrisk
enfeeblementAI-Induced Enfeeblementrisk
existential-riskExistential Risk from AIconcept
reward-hackingReward Hackingrisk
schemingSchemingrisk
trust-cascadeAI Trust Cascade Failurerisk
Longterm Wiki