{"id":"https://openalex.org/W4388926373","doi":"https://doi.org/10.48550/arxiv.2311.12022","title":"GPQA: A Graduate-Level Google-Proof Q&amp;A Benchmark","display_name":"GPQA: A Graduate-Level Google-Proof Q&amp;A Benchmark","publication_year":2023,"publication_date":"2023-11-20","ids":{"openalex":"https://openalex.org/W4388926373","doi":"https://doi.org/10.48550/arxiv.2311.12022"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2311.12022","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2311.12022","pdf_url":"https://arxiv.org/pdf/2311.12022","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2311.12022","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5060062241","display_name":"David B. Rein","orcid":"https://orcid.org/0000-0002-1271-5789"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Rein, David","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056278789","display_name":"Betty Li Hou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hou, Betty Li","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024678824","display_name":"Asa Cooper Stickland","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Stickland, Asa Cooper","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020894677","display_name":"Jackson Petty","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Petty, Jackson","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103936332","display_name":"Richard Yuanzhe Pang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pang, Richard Yuanzhe","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002006843","display_name":"Julien Dirani","orcid":"https://orcid.org/0000-0002-5632-9863"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dirani, Julien","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072681909","display_name":"Julian Michael","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Michael, Julian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5112713734","display_name":"Samuel R. Bowman","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bowman, Samuel R.","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5060062241"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":20,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9872000217437744,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9872000217437744,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9785000085830688,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9771000146865845,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7280190587043762},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.722220778465271},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6317152380943298},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.6226784586906433},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.5188499689102173},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.5181576013565063},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.5082228183746338},{"id":"https://openalex.org/keywords/frontier","display_name":"Frontier","score":0.4248882532119751},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3797757029533386},{"id":"https://openalex.org/keywords/political-science","display_name":"Political science","score":0.0887458324432373},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.08731895685195923},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.07729554176330566}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7280190587043762},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.722220778465271},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6317152380943298},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.6226784586906433},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.5188499689102173},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.5181576013565063},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.5082228183746338},{"id":"https://openalex.org/C2778571376","wikidata":"https://www.wikidata.org/wiki/Q1355821","display_name":"Frontier","level":2,"score":0.4248882532119751},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3797757029533386},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0887458324432373},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.08731895685195923},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.07729554176330566},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2311.12022","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2311.12022","pdf_url":"https://arxiv.org/pdf/2311.12022","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2311.12022","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2311.12022","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2311.12022","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2311.12022","pdf_url":"https://arxiv.org/pdf/2311.12022","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4388926373.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2347401120","https://openalex.org/W2261902776","https://openalex.org/W2041961361","https://openalex.org/W2310010941","https://openalex.org/W1988132375","https://openalex.org/W2334292868","https://openalex.org/W579144800","https://openalex.org/W2147233680","https://openalex.org/W2069525434","https://openalex.org/W2046798653"],"abstract_inverted_index":{"We":[0,19],"present":[1],"GPQA,":[2],"a":[3],"challenging":[4],"dataset":[5],"of":[6,149],"448":[7],"multiple-choice":[8],"questions":[9,23,77,81],"written":[10],"by":[11],"domain":[12],"experts":[13,29,49,174],"in":[14,36,51],"biology,":[15],"physics,":[16],"and":[17,26,145,155],"chemistry.":[18],"ensure":[20],"that":[21,127,183],"the":[22,37,48,73,76,140],"are":[24,33,78,82,100,142],"high-quality":[25],"extremely":[27],"difficult:":[28],"who":[30],"have":[31],"or":[32],"pursuing":[34],"PhDs":[35],"corresponding":[38],"domains":[39],"reach":[40,59],"65%":[41],"accuracy":[42],"(74%":[43],"when":[44,115],"discounting":[45],"clear":[46],"mistakes":[47],"identified":[50],"retrospect),":[52],"while":[53],"highly":[54],"skilled":[55,144,153],"non-expert":[56],"validators":[57],"only":[58],"34%":[60],"accuracy,":[61],"despite":[62],"spending":[63],"on":[64],"average":[65],"over":[66],"30":[67],"minutes":[68],"with":[69,89],"unrestricted":[70],"access":[71],"to":[72,101,106,122,130,175],"web":[74],"(i.e.,":[75],"\"Google-proof\").":[79],"The":[80,147],"also":[83],"difficult":[84,137],"for":[85,113,152,172],"state-of-the-art":[86],"AI":[87,104,157,181],"systems,":[88],"our":[90],"strongest":[91],"GPT-4":[92],"based":[93],"baseline":[94],"achieving":[95],"39%":[96],"accuracy.":[97],"If":[98],"we":[99,120,166],"use":[102],"future":[103],"systems":[105,158,182],"help":[107,169],"us":[108],"answer":[109],"very":[110],"hard":[111],"questions,":[112],"example,":[114],"developing":[116],"new":[117],"scientific":[118],"knowledge,":[119],"need":[121],"develop":[123],"scalable":[124,162],"oversight":[125,163],"methods":[126],"enable":[128,160],"humans":[129],"supervise":[131],"their":[132],"outputs,":[133],"which":[134,165],"may":[135],"be":[136],"even":[138],"if":[139],"supervisors":[141],"themselves":[143],"knowledgeable.":[146],"difficulty":[148],"GPQA":[150],"both":[151],"non-experts":[154],"frontier":[156],"should":[159],"realistic":[161],"experiments,":[164],"hope":[167],"can":[168],"devise":[170],"ways":[171],"human":[173,185],"reliably":[176],"get":[177],"truthful":[178],"information":[179],"from":[180],"surpass":[184],"capabilities.":[186]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":12},{"year":2024,"cited_by_count":5}],"updated_date":"2026-03-11T14:59:36.786465","created_date":"2025-10-10T00:00:00"}
