{"id":"https://openalex.org/W7138868823","doi":"https://doi.org/10.1609/aaai.v40i40.40655","title":"OmniBench: A Comprehensive Benchmark Integrating Real-World, Time-sensitive, and Multi-Hop Questions with a Multi-Dimensional Hybrid Evaluation Framework","display_name":"OmniBench: A Comprehensive Benchmark Integrating Real-World, Time-sensitive, and Multi-Hop Questions with a Multi-Dimensional Hybrid Evaluation Framework","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138868823","doi":"https://doi.org/10.1609/aaai.v40i40.40655"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i40.40655","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i40.40655","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40655/44616","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40655/44616","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130051939","display_name":"Wenjie Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wenjie Wang","raw_affiliation_strings":["Ant Group"],"affiliations":[{"raw_affiliation_string":"Ant Group","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129795353","display_name":"Yufeng Jiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yufeng Jiang","raw_affiliation_strings":["Ant Group"],"affiliations":[{"raw_affiliation_string":"Ant Group","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130028817","display_name":"Ge Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ge Sun","raw_affiliation_strings":["Ant Group"],"affiliations":[{"raw_affiliation_string":"Ant Group","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112870714","display_name":"Chenghang Dong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chenghang Dong","raw_affiliation_strings":["Ant Group"],"affiliations":[{"raw_affiliation_string":"Ant Group","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129985695","display_name":"Zheng Jun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng Jun","raw_affiliation_strings":["Ant Group"],"affiliations":[{"raw_affiliation_string":"Ant Group","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100540823","display_name":"Li Mengjie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li Mengjie","raw_affiliation_strings":["Ant Group"],"affiliations":[{"raw_affiliation_string":"Ant Group","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090561833","display_name":"Lixin Chen","orcid":"https://orcid.org/0000-0002-9910-6483"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lixin Chen","raw_affiliation_strings":["Ant Group"],"affiliations":[{"raw_affiliation_string":"Ant Group","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130152115","display_name":"Huan Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huan Wang","raw_affiliation_strings":["Ant Group"],"affiliations":[{"raw_affiliation_string":"Ant Group","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129894904","display_name":"Haoyu Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Haoyu Wang","raw_affiliation_strings":["Ant Group"],"affiliations":[{"raw_affiliation_string":"Ant Group","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5130192948","display_name":"Bin Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bin Chen","raw_affiliation_strings":["Ant Group"],"affiliations":[{"raw_affiliation_string":"Ant Group","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5130051939"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.75856531,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"40","first_page":"33657","last_page":"33665"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.4666000008583069,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.4666000008583069,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13274","display_name":"Expert finding and Q&A systems","score":0.24060000479221344,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.046300001442432404,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.8514000177383423},{"id":"https://openalex.org/keywords/bridge","display_name":"Bridge (graph theory)","score":0.669700026512146},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.6330000162124634},{"id":"https://openalex.org/keywords/component","display_name":"Component (thermodynamics)","score":0.33320000767707825},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.2867000102996826}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.8514000177383423},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7268999814987183},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.669700026512146},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.6330000162124634},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.43880000710487366},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.374099999666214},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.33320000767707825},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.30979999899864197},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.29760000109672546},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.29120001196861267},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2867000102996826},{"id":"https://openalex.org/C50897621","wikidata":"https://www.wikidata.org/wiki/Q2665508","display_name":"Hybrid system","level":2,"score":0.2809999883174896},{"id":"https://openalex.org/C175309249","wikidata":"https://www.wikidata.org/wiki/Q725864","display_name":"Pipeline transport","level":2,"score":0.265500009059906},{"id":"https://openalex.org/C3018395757","wikidata":"https://www.wikidata.org/wiki/Q1379672","display_name":"Evaluation methods","level":2,"score":0.259799987077713}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i40.40655","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i40.40655","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40655/44616","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i40.40655","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i40.40655","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40655/44616","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7138868823.pdf","grobid_xml":"https://content.openalex.org/works/W7138868823.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recently,":[0],"with":[1,145,201],"the":[2,56,112,125,131,140,152,174,205,208],"increasing":[3],"capabilities":[4],"of":[5,61,82,111,130,155,188,207],"Large":[6],"Language":[7],"Models":[8],"(LLMs),":[9],"AI":[10],"applications":[11],"have":[12],"gradually":[13],"emerged":[14],"to":[15,55,184],"solve":[16],"various":[17,91],"problems":[18],"in":[19,159],"people's":[20],"daily":[21],"lives,":[22],"so":[23],"accurately":[24],"measuring":[25],"their":[26],"performance":[27],"and":[28,58,93,101,127,181],"reliability":[29,206],"is":[30,79,170],"paramount.":[31],"However,":[32],"existing":[33],"benchmarks":[34],"predominantly":[35],"rely":[36],"on":[37,90],"closed-ended,":[38],"multiple-choice":[39],"or":[40],"short-answer":[41],"question":[42],"formats.":[43],"While":[44],"useful":[45],"for":[46,172],"assessment,":[47],"these":[48],"formats":[49],"exhibit":[50],"a":[51,74,108,163],"significant":[52],"gap":[53],"compared":[54],"diverse":[57,179],"open-ended":[59],"nature":[60],"questions":[62,85,141],"posed":[63],"by":[64],"real-world":[65,88],"users.":[66],"To":[67],"bridge":[68],"this":[69,195],"gap,":[70],"we":[71,116],"produce":[72],"OmniBench,":[73],"comprehensive":[75],"open-domain":[76],"benchmark.":[77],"OmniBench":[78],"uniquely":[80],"composed":[81],"authentic,":[83],"user-generated":[84],"harvested":[86],"from":[87,107],"interactions":[89],"websites":[92],"applications,":[94],"covering":[95],"16":[96],"rigorously":[97],"defined":[98],"knowledge":[99],"domains":[100],"5":[102],"crucial":[103],"user":[104],"intents":[105],"derived":[106],"large-scale":[109],"analysis":[110],"mass":[113],"corpus.":[114],"Crucially,":[115],"propose":[117],"three":[118],"automated":[119],"data":[120,156],"construction":[121],"pipelines":[122],"that":[123,139,194],"enable":[124],"continuous":[126],"periodic":[128],"updating":[129],"benchmark":[132,209],"dataset.":[133],"This":[134,176],"approach":[135],"not":[136],"only":[137],"ensures":[138],"can":[142],"keep":[143],"up":[144],"current":[146],"events,":[147],"but":[148],"also":[149],"effectively":[150],"mitigates":[151],"critical":[153],"issue":[154],"contamination":[157],"prevalent":[158],"static":[160],"benchmarks.":[161],"Moreover,":[162],"multi-dimensional":[164],"hybrid":[165],"evaluation":[166,182,196],"framework":[167,177,197],"named":[168],"OmniEval":[169],"proposed":[171],"evaluating":[173],"responses.":[175],"combines":[178],"metrics":[180],"methods":[183],"capture":[185],"nuanced":[186],"aspects":[187],"answer":[189],"performance.":[190],"Extensive":[191],"validation":[192],"demonstrates":[193],"exhibits":[198],"strong":[199],"alignment":[200],"human":[202],"judgments,":[203],"ensuring":[204],"results.":[210]},"counts_by_year":[],"updated_date":"2026-03-20T20:54:20.808490","created_date":"2026-03-20T00:00:00"}
