{"id":"https://openalex.org/W4320003957","doi":"https://doi.org/10.48550/arxiv.2211.09110","title":"Holistic Evaluation of Language Models","display_name":"Holistic Evaluation of Language Models","publication_year":2022,"publication_date":"2022-11-16","ids":{"openalex":"https://openalex.org/W4320003957","doi":"https://doi.org/10.48550/arxiv.2211.09110"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2211.09110","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2211.09110","pdf_url":"https://arxiv.org/pdf/2211.09110","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2211.09110","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5025255782","display_name":"Percy Liang","orcid":"https://orcid.org/0000-0002-0458-6139"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Liang, Percy","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069576651","display_name":"Rishi Bommasani","orcid":"https://orcid.org/0000-0002-9616-5138"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bommasani, Rishi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070994105","display_name":"Tong Lee","orcid":"https://orcid.org/0000-0001-9817-2908"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Tony","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031570233","display_name":"Dimitris Tsipras","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tsipras, Dimitris","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088639037","display_name":"Dilara Soylu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Soylu, Dilara","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005898634","display_name":"Michihiro Yasunaga","orcid":"https://orcid.org/0009-0003-3008-927X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yasunaga, Michihiro","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001146558","display_name":"Yian Zhang","orcid":"https://orcid.org/0000-0002-4033-211X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079514101","display_name":"Deepak Narayanan","orcid":"https://orcid.org/0000-0002-3020-2848"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Narayanan, Deepak","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024901763","display_name":"Yuhuai Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Yuhuai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014379995","display_name":"Ananya Kumar","orcid":"https://orcid.org/0000-0002-9002-510X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kumar, Ananya","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035267335","display_name":"Benjamin T. Newman","orcid":"https://orcid.org/0000-0002-0668-2853"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Newman, Benjamin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002684888","display_name":"Binhang Yuan","orcid":"https://orcid.org/0000-0002-3188-2769"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuan, Binhang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060511665","display_name":"Bobby Yan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yan, Bobby","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004750246","display_name":"Ce Zhang","orcid":"https://orcid.org/0000-0001-5100-3584"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Ce","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072688499","display_name":"Christian Cosgrove","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cosgrove, Christian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046006076","display_name":"Christopher D. Manning","orcid":"https://orcid.org/0000-0001-6155-649X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Manning, Christopher D.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103852640","display_name":"Christopher R\u00e9","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"R\u00e9, Christopher","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040568599","display_name":"Diana Acosta-Navas","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Acosta-Navas, Diana","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027693232","display_name":"Drew A. Hudson","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hudson, Drew A.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062826850","display_name":"Eric Zelikman","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zelikman, Eric","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001994692","display_name":"Esin Durmus","orcid":"https://orcid.org/0009-0009-7331-8160"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Durmus, Esin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074277807","display_name":"Faisal Ladhak","orcid":"https://orcid.org/0009-0009-5730-3866"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ladhak, Faisal","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050798856","display_name":"Frieda Rong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rong, Frieda","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101912684","display_name":"Hong\u2010Yu Ren","orcid":"https://orcid.org/0000-0003-4733-7607"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ren, Hongyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051534896","display_name":"Huaxiu Yao","orcid":"https://orcid.org/0000-0002-8691-9629"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yao, Huaxiu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100440609","display_name":"Jue Wang","orcid":"https://orcid.org/0000-0002-6712-1929"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Jue","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026521456","display_name":"Keshav Santhanam","orcid":"https://orcid.org/0000-0001-5939-7944"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Santhanam, Keshav","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054285881","display_name":"Laurel Orr","orcid":"https://orcid.org/0000-0002-2183-3541"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Orr, Laurel","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052391231","display_name":"Lucia Zheng","orcid":"https://orcid.org/0000-0002-8602-0007"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Lucia","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061433075","display_name":"Mert Y\u00fcksekg\u00f6n\u00fcl","orcid":"https://orcid.org/0000-0002-9761-8178"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuksekgonul, Mert","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009980324","display_name":"Mirac S\u00fczg\u00fcn","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Suzgun, Mirac","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006527256","display_name":"Nathan Kim","orcid":"https://orcid.org/0000-0002-0503-0263"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Nathan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068178240","display_name":"Neel Guha","orcid":"https://orcid.org/0009-0003-5120-1726"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guha, Neel","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033738047","display_name":"Niladri S. Chatterji","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chatterji, Niladri","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015201177","display_name":"Omar Khattab","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Khattab, Omar","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049073875","display_name":"Peter Henderson","orcid":"https://orcid.org/0000-0003-3938-0541"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Henderson, Peter","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101856573","display_name":"Qian Huang","orcid":"https://orcid.org/0000-0001-9658-4714"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Qian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013985536","display_name":"Ryan Chi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chi, Ryan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069097726","display_name":"Sang Michael Xie","orcid":"https://orcid.org/0000-0002-0820-2753"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xie, Sang Michael","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068204319","display_name":"Shibani Santurkar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Santurkar, Shibani","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056551357","display_name":"Surya Ganguli","orcid":"https://orcid.org/0000-0002-9264-7551"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ganguli, Surya","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015518638","display_name":"Tatsunori Hashimoto","orcid":"https://orcid.org/0000-0003-0521-5855"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hashimoto, Tatsunori","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079244248","display_name":"Thomas Icard","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Icard, Thomas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100437467","display_name":"Tianyi Zhang","orcid":"https://orcid.org/0009-0002-8868-7064"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Tianyi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053028004","display_name":"Vishrav Chaudhary","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chaudhary, Vishrav","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100702488","display_name":"William Yang Wang","orcid":"https://orcid.org/0000-0002-0878-1257"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, William","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100752868","display_name":"Xuechen Li","orcid":"https://orcid.org/0000-0002-9349-8375"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Xuechen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006623234","display_name":"Yifan Mai","orcid":"https://orcid.org/0000-0002-4807-037X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mai, Yifan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100624916","display_name":"Yuhui Zhang","orcid":"https://orcid.org/0000-0002-5769-3456"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yuhui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5054117253","display_name":"Yuta Koreeda","orcid":"https://orcid.org/0009-0007-2262-3072"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Koreeda, Yuta","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":50,"corresponding_author_ids":["https://openalex.org/A5025255782"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":119,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9721999764442444,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9721999764442444,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9180999994277954,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7807142734527588},{"id":"https://openalex.org/keywords/transparency","display_name":"Transparency (behavior)","score":0.6821016073226929},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5994040369987488},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5753074884414673},{"id":"https://openalex.org/keywords/mainstream","display_name":"Mainstream","score":0.5279777646064758},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.5143881440162659},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.4305817484855652},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.39883214235305786},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.34185367822647095},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.18068617582321167}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7807142734527588},{"id":"https://openalex.org/C2780233690","wikidata":"https://www.wikidata.org/wiki/Q535347","display_name":"Transparency (behavior)","level":2,"score":0.6821016073226929},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5994040369987488},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5753074884414673},{"id":"https://openalex.org/C2777617010","wikidata":"https://www.wikidata.org/wiki/Q18957","display_name":"Mainstream","level":2,"score":0.5279777646064758},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.5143881440162659},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.4305817484855652},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.39883214235305786},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34185367822647095},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.18068617582321167},{"id":"https://openalex.org/C27206212","wikidata":"https://www.wikidata.org/wiki/Q34178","display_name":"Theology","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"pmh:oai:arXiv.org:2211.09110","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2211.09110","pdf_url":"https://arxiv.org/pdf/2211.09110","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"pmh:oai:repository.hkust.edu.hk:1783.1-156660","is_oa":false,"landing_page_url":"http://repository.hkust.edu.hk/ir/Record/1783.1-156660","pdf_url":null,"source":{"id":"https://openalex.org/S4306401796","display_name":"Rare & Special e-Zone (The Hong Kong University of Science and Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I200769079","host_organization_name":"Hong Kong University of Science and Technology","host_organization_lineage":["https://openalex.org/I200769079"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2211.09110","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2211.09110","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2211.09110","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2211.09110","pdf_url":"https://arxiv.org/pdf/2211.09110","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.44999998807907104,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G5921281487","display_name":null,"funder_award_id":"number","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6894402473","display_name":null,"funder_award_id":"Fellowship","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320309434","display_name":"University of Wisconsin-Madison","ror":"https://ror.org/01y2jtd41"},{"id":"https://openalex.org/F4320321652","display_name":"Eidgen\u00f6ssische Technische Hochschule Z\u00fcrich","ror":"https://ror.org/05a28rw58"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4320003957.pdf","grobid_xml":"https://content.openalex.org/works/W4320003957.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W1583826057","https://openalex.org/W2377237701","https://openalex.org/W2360099860","https://openalex.org/W4323893170","https://openalex.org/W2352463596","https://openalex.org/W2380850119","https://openalex.org/W2101450440","https://openalex.org/W2383675217","https://openalex.org/W2376151201","https://openalex.org/W2393898889"],"abstract_inverted_index":{"Language":[0,28],"models":[1,160,185,201,217],"(LMs)":[2],"are":[3,19,55,129],"becoming":[4],"the":[5,33,41,114,124,194,223,270],"foundation":[6],"for":[7,58,79,84,104,250,262,269],"almost":[8],"all":[9,168,215,243],"major":[10],"language":[11,36,159],"technologies,":[12],"but":[13],"their":[14],"capabilities,":[15],"limitations,":[16],"and":[17,50,69,102,126,164,227,247,278],"risks":[18],"not":[20,175,202],"well":[21,254],"understood.":[22],"We":[23,92,132,209,260],"present":[24],"Holistic":[25],"Evaluation":[26],"of":[27,35,44,56,106,113,156,172,193],"Models":[29],"(HELM)":[30],"to":[31,123,143,183,212,264],"improve":[32,210],"transparency":[34],"models.":[37,279],"First,":[38],"we":[39,61,87,151,241],"taxonomize":[40],"vast":[42],"space":[43],"potential":[45],"scenarios":[46,109,226],"(i.e.":[47,52],"use":[48],"cases)":[49],"metrics":[51,83,95,118,228],"desiderata)":[53],"that":[54,127],"interest":[57],"LMs.":[59],"Then":[60],"select":[62],"a":[63,89,153,204,256,266],"broad":[64],"subset":[65],"based":[66,138],"on":[67,139,167,186,190,222],"coverage":[68],"feasibility,":[70],"noting":[71],"what's":[72],"missing":[73],"or":[74],"underrepresented":[75],"(e.g.":[76,147],"question":[77],"answering":[78],"neglected":[80],"English":[81],"dialects,":[82],"trustworthiness).":[85],"Second,":[86],"adopt":[88],"multi-metric":[90],"approach:":[91],"measure":[93],"7":[94,135],"(accuracy,":[96],"calibration,":[97],"robustness,":[98],"fairness,":[99],"bias,":[100],"toxicity,":[101],"efficiency)":[103],"each":[105],"16":[107],"core":[108,195,225],"when":[110],"possible":[111],"(87.5%":[112],"time).":[115],"This":[116],"ensures":[117],"beyond":[119],"accuracy":[120],"don't":[121],"fall":[122],"wayside,":[125],"trade-offs":[128],"clearly":[130],"exposed.":[131],"also":[133],"perform":[134],"targeted":[136,141],"evaluations,":[137],"26":[140],"scenarios,":[142,170,197,276],"analyze":[144],"specific":[145],"aspects":[146],"reasoning,":[148],"disinformation).":[149],"Third,":[150],"conduct":[152],"large-scale":[154],"evaluation":[155,233],"30":[157,216],"prominent":[158,200],"(spanning":[161],"open,":[162],"limited-access,":[163],"closed":[165],"models)":[166],"42":[169],"21":[171],"which":[173],"were":[174,188],"previously":[176],"used":[177],"in":[178,207],"mainstream":[179],"LM":[180],"evaluation.":[181],"Prior":[182],"HELM,":[184],"average":[187],"evaluated":[189],"just":[191],"17.9%":[192],"HELM":[196,263],"with":[198,274],"some":[199],"sharing":[203],"single":[205],"scenario":[206],"common.":[208],"this":[211],"96.0%:":[213],"now":[214],"have":[218],"been":[219],"densely":[220],"benchmarked":[221],"same":[224],"under":[229],"standardized":[230],"conditions.":[231],"Our":[232],"surfaces":[234],"25":[235],"top-level":[236],"findings.":[237],"For":[238],"full":[239],"transparency,":[240],"release":[242],"raw":[244],"model":[245],"prompts":[246],"completions":[248],"publicly":[249],"further":[251],"analysis,":[252],"as":[253,255],"general":[257],"modular":[258],"toolkit.":[259],"intend":[261],"be":[265],"living":[267],"benchmark":[268],"community,":[271],"continuously":[272],"updated":[273],"new":[275],"metrics,":[277]},"counts_by_year":[{"year":2026,"cited_by_count":14},{"year":2025,"cited_by_count":36},{"year":2024,"cited_by_count":27},{"year":2023,"cited_by_count":41}],"updated_date":"2026-04-16T08:26:57.006410","created_date":"2025-10-10T00:00:00"}
