5:["$","article",null,{"className":"mx-auto max-w-3xl px-4 py-8 sm:px-6 lg:px-8","children":[["$","div",null,{"className":"mb-8 h-1.5 w-full rounded-full bg-gradient-to-r from-blue-600 to-blue-400"}],["$","div",null,{"className":"mb-8 flex items-center gap-4","children":[["$","a",null,{"href":"/ainews","children":[["$","svg",null,{"xmlns":"http://www.w3.org/2000/svg","width":24,"height":24,"viewBox":"0 0 24 24","fill":"none","stroke":"currentColor","strokeWidth":2,"strokeLinecap":"round","strokeLinejoin":"round","className":"lucide lucide-arrow-left size-4","children":[["$","path","1l729n",{"d":"m12 19-7-7 7-7"}],["$","path","x3x0zl",{"d":"M19 12H5"}],"$undefined"]}],"Back to News"],"className":"inline-flex items-center justify-center whitespace-nowrap text-sm font-medium ring-offset-background transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50 h-9 rounded-md px-3 gap-2 text-blue-600 hover:bg-blue-50 hover:text-blue-700"}],["$","a",null,{"href":"https://t.me/AI_News_CN/18559?lang=en","target":"_blank","rel":"noopener noreferrer","children":[["$","span",null,{"children":"Read Original"}],["$","svg",null,{"xmlns":"http://www.w3.org/2000/svg","width":24,"height":24,"viewBox":"0 0 24 24","fill":"none","stroke":"currentColor","strokeWidth":2,"strokeLinecap":"round","strokeLinejoin":"round","className":"lucide lucide-external-link size-4","children":[["$","path","1q9fwt",{"d":"M15 3h6v6"}],["$","path","gplh6r",{"d":"M10 14 21 3"}],["$","path","a6xqqp",{"d":"M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6"}],"$undefined"]}]],"className":"inline-flex items-center justify-center whitespace-nowrap text-sm font-medium ring-offset-background transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50 border bg-background h-9 rounded-md px-3 gap-2 border-blue-200 text-blue-600 hover:bg-blue-50 hover:text-blue-700"}]]}],["$","div",null,{"className":"mb-6 flex items-center gap-4 text-sm","children":[["$","time",null,{"dateTime":"2025-04-02T19:29:50+00:00","className":"font-medium text-blue-600","children":"3 months ago"}],["$","span",null,{"className":"rounded-full bg-blue-100 px-3 py-1.5 font-medium text-blue-700 shadow-sm","children":"AI News CN (Telegram) - English Translation"}]]}],["$","div",null,{"className":"relative mb-8","children":[["$","h1",null,{"className":"text-3xl font-bold tracking-tight text-gray-900 sm:text-4xl","children":"🖼 We release PaperBench, a benchmark for evaluating the ability of AI agents to reproduce state-of-the-art AI research, and it is also part of our defense framework. AI agents must reproduce the top papers of ICML 2024,..."}],["$","div",null,{"className":"mt-4 h-1 w-32 rounded-full bg-blue-500"}]]}],["$","div",null,{"className":"rounded-xl border border-blue-100 bg-white p-6 shadow-md","children":["$","div",null,{"className":"prose prose-lg max-w-none prose-headings:text-blue-900 prose-a:text-blue-600 prose-a:no-underline hover:prose-a:underline prose-blockquote:border-blue-500 prose-strong:text-blue-700","dangerouslySetInnerHTML":{"__html":"

We release PaperBench, a benchmark for evaluating the ability of AI agents to reproduce state-of-the-art AI research, and it is also part of our defense framework.

AI agents must reproduce the top papers from ICML 2024. The tasks cover understanding the papers, writing code, and conducting experiments.

(@OpenAI)

via Teahouse - Telegram Channel

"}}]}],["$","div",null,{"className":"mt-12 flex items-center","children":[["$","div",null,{"className":"h-0.5 flex-grow rounded-full bg-blue-100"}],["$","div",null,{"className":"mx-2 text-blue-400","children":"•••"}],["$","div",null,{"className":"h-0.5 flex-grow rounded-full bg-blue-100"}]]}]]}]