{"id":"https://openalex.org/W4408186113","doi":"https://doi.org/10.1145/3722449.3722467","title":"Evaluating the Evaluations: A Perspective on Benchmarks","display_name":"Evaluating the Evaluations: A Perspective on Benchmarks","publication_year":2024,"publication_date":"2024-12-01","ids":{"openalex":"https://openalex.org/W4408186113","doi":"https://doi.org/10.1145/3722449.3722467"},"language":"en","primary_location":{"id":"doi:10.1145/3722449.3722467","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3722449.3722467","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3722449.3722467?download=true","source":{"id":"https://openalex.org/S6756005","display_name":"ACM SIGIR Forum","issn_l":"0163-5840","issn":["0163-5840","1558-0229"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM SIGIR Forum","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3722449.3722467?download=true","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043136878","display_name":"Omar Alonso","orcid":"https://orcid.org/0009-0009-2515-4771"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Omar Alonso","raw_affiliation_strings":["Amazon, Palo Alto, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Amazon, Palo Alto, CA, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5016543371","display_name":"Kenneth Church","orcid":"https://orcid.org/0000-0001-8378-6069"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kenneth Church","raw_affiliation_strings":["Northeastern University, Boston, MA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, MA, USA","institution_ids":["https://openalex.org/I12912129"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5043136878"],"corresponding_institution_ids":["https://openalex.org/I1311688040"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.24389945,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"58","issue":"2","first_page":"1","last_page":"27"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9890999794006348,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9890999794006348,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.9617999792098999,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12010","display_name":"Evaluation and Performance Assessment","score":0.9168999791145325,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.739089846611023},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.41675955057144165},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.15712544322013855}],"concepts":[{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.739089846611023},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.41675955057144165},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.15712544322013855}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3722449.3722467","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3722449.3722467","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3722449.3722467?download=true","source":{"id":"https://openalex.org/S6756005","display_name":"ACM SIGIR Forum","issn_l":"0163-5840","issn":["0163-5840","1558-0229"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM SIGIR Forum","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3722449.3722467","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3722449.3722467","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3722449.3722467?download=true","source":{"id":"https://openalex.org/S6756005","display_name":"ACM SIGIR Forum","issn_l":"0163-5840","issn":["0163-5840","1558-0229"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM SIGIR Forum","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4408186113.pdf"},"referenced_works_count":103,"referenced_works":["https://openalex.org/W96378112","https://openalex.org/W119481552","https://openalex.org/W210770835","https://openalex.org/W1483126227","https://openalex.org/W1486212465","https://openalex.org/W1494198834","https://openalex.org/W1523019309","https://openalex.org/W1532008752","https://openalex.org/W1555915743","https://openalex.org/W1907286193","https://openalex.org/W2007339694","https://openalex.org/W2019976352","https://openalex.org/W2037140704","https://openalex.org/W2051167396","https://openalex.org/W2060345601","https://openalex.org/W2062270497","https://openalex.org/W2062874156","https://openalex.org/W2070246124","https://openalex.org/W2075173586","https://openalex.org/W2083305840","https://openalex.org/W2086253379","https://openalex.org/W2088209891","https://openalex.org/W2092654472","https://openalex.org/W2093432797","https://openalex.org/W2113459411","https://openalex.org/W2115186633","https://openalex.org/W2117539524","https://openalex.org/W2150102617","https://openalex.org/W2154774499","https://openalex.org/W2156037541","https://openalex.org/W2166637769","https://openalex.org/W2167342631","https://openalex.org/W2194775991","https://openalex.org/W2257437519","https://openalex.org/W2277195237","https://openalex.org/W2293201672","https://openalex.org/W2295990020","https://openalex.org/W2343954916","https://openalex.org/W2725179571","https://openalex.org/W2741905638","https://openalex.org/W2766474394","https://openalex.org/W2798425628","https://openalex.org/W2798702047","https://openalex.org/W2896457183","https://openalex.org/W2912924812","https://openalex.org/W2923014074","https://openalex.org/W2943552823","https://openalex.org/W2960010704","https://openalex.org/W2963323070","https://openalex.org/W2963748441","https://openalex.org/W2963957489","https://openalex.org/W2979417040","https://openalex.org/W2981320891","https://openalex.org/W2995929068","https://openalex.org/W3030163527","https://openalex.org/W3035390927","https://openalex.org/W3092862347","https://openalex.org/W3112129603","https://openalex.org/W3127686677","https://openalex.org/W3169369929","https://openalex.org/W3177765786","https://openalex.org/W4206765718","https://openalex.org/W4210764005","https://openalex.org/W4210913687","https://openalex.org/W4221159672","https://openalex.org/W4226082499","https://openalex.org/W4231856373","https://openalex.org/W4232980324","https://openalex.org/W4234917632","https://openalex.org/W4240407914","https://openalex.org/W4240913316","https://openalex.org/W4241122026","https://openalex.org/W4243333943","https://openalex.org/W4249672879","https://openalex.org/W4251560691","https://openalex.org/W4255492838","https://openalex.org/W4255556797","https://openalex.org/W4287553199","https://openalex.org/W4299425675","https://openalex.org/W4300046558","https://openalex.org/W4300896918","https://openalex.org/W4366460536","https://openalex.org/W4385889927","https://openalex.org/W4386875581","https://openalex.org/W4386908049","https://openalex.org/W4389579297","https://openalex.org/W4392384494","https://openalex.org/W4404368089","https://openalex.org/W4405329769","https://openalex.org/W6600477187","https://openalex.org/W6628905179","https://openalex.org/W6631459773","https://openalex.org/W6650218246","https://openalex.org/W6651560827","https://openalex.org/W6660191876","https://openalex.org/W6668865457","https://openalex.org/W6684900693","https://openalex.org/W6703967046","https://openalex.org/W6735637513","https://openalex.org/W6775204245","https://openalex.org/W6810081322","https://openalex.org/W6819575453","https://openalex.org/W7043762925"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2018871932","https://openalex.org/W2001405890"],"abstract_inverted_index":{"More":[0],"and":[1,5,22,26,34,39,50,64,86,100],"more":[2],"benchmarks,":[3],"datasets,":[4],"evaluation":[6,112],"tasks":[7],"are":[8],"becoming":[9],"available.":[10],"This":[11],"is":[12,41,55],"extremely":[13],"useful":[14],"for":[15,110],"the":[16,31],"community":[17,75],"because":[18],"it":[19],"enables":[20],"researchers":[21],"practitioners":[23],"to":[24,48,59,88],"test":[25],"evaluate":[27],"new":[28],"techniques.":[29],"However,":[30],"construction,":[32],"evaluation,":[33],"maintenance":[35],"of":[36,83,108],"data":[37,81],"sets":[38],"benchmarks":[40],"opaque":[42],"which":[43],"creates":[44],"problems":[45],"with":[46],"respect":[47],"stability":[49],"true":[51],"representations.":[52],"Our":[53],"position":[54],"that":[56,73],"we":[57,62],"need":[58],"revisit":[60],"how":[61],"design":[63],"implement":[65],"benchmarks.":[66,113],"The":[67],"SPEC":[68],"benchmark":[69,91],"offers":[70],"interesting":[71],"perspectives":[72],"our":[74],"should":[76],"consider.":[77],"We":[78,102],"use":[79],"a":[80,106],"set":[82],"influential":[84],"papers":[85],"resources":[87],"discuss":[89],"important":[90],"aspects":[92],"such":[93],"as":[94],"realistic":[95],"workloads,":[96],"reliability,":[97],"validity,":[98],"leakage,":[99],"labeling.":[101],"conclude":[103],"by":[104],"proposing":[105],"list":[107],"principles":[109],"constructing":[111]},"counts_by_year":[],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
