{"id":"https://openalex.org/W7140322352","doi":"https://doi.org/10.48550/arxiv.2603.23160","title":"UniDial-EvalKit: A Unified Toolkit for Evaluating Multi-Faceted Conversational Abilities","display_name":"UniDial-EvalKit: A Unified Toolkit for Evaluating Multi-Faceted Conversational Abilities","publication_year":2026,"publication_date":"2026-03-24","ids":{"openalex":"https://openalex.org/W7140322352","doi":"https://doi.org/10.48550/arxiv.2603.23160"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.23160","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.23160","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.23160","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130550020","display_name":"Qi Jia","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Jia, Qi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025780527","display_name":"Haodong Zhao","orcid":"https://orcid.org/0000-0002-4405-1649"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Haodong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130619844","display_name":"Dun Pei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pei, Dun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130572098","display_name":"Xiujie Song","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Song, Xiujie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130543216","display_name":"Shibo Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Shibo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130562372","display_name":"Zijian Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Zijian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130550021","display_name":"Zicheng Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Zicheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130617836","display_name":"Xiangyang Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Xiangyang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5130547827","display_name":"Guangtao Zhai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhai, Guangtao","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5130550020"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.13449999690055847,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.13449999690055847,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.11810000240802765,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11902","display_name":"Intelligent Tutoring Systems and Adaptive Learning","score":0.0877000018954277,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.826200008392334},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.6704999804496765},{"id":"https://openalex.org/keywords/modular-design","display_name":"Modular design","score":0.66839998960495},{"id":"https://openalex.org/keywords/scripting-language","display_name":"Scripting language","score":0.5986999869346619},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.48539999127388},{"id":"https://openalex.org/keywords/debugging","display_name":"Debugging","score":0.4196999967098236},{"id":"https://openalex.org/keywords/core","display_name":"Core (optical fiber)","score":0.375}],"concepts":[{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.826200008392334},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8070999979972839},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.6704999804496765},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.66839998960495},{"id":"https://openalex.org/C61423126","wikidata":"https://www.wikidata.org/wiki/Q187432","display_name":"Scripting language","level":2,"score":0.5986999869346619},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.48539999127388},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.4578000009059906},{"id":"https://openalex.org/C168065819","wikidata":"https://www.wikidata.org/wiki/Q845566","display_name":"Debugging","level":2,"score":0.4196999967098236},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.375},{"id":"https://openalex.org/C2780366209","wikidata":"https://www.wikidata.org/wiki/Q5170200","display_name":"Core model","level":2,"score":0.3650999963283539},{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.3434999883174896},{"id":"https://openalex.org/C3018395757","wikidata":"https://www.wikidata.org/wiki/Q1379672","display_name":"Evaluation methods","level":2,"score":0.3375000059604645},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3314000070095062},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.33009999990463257},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.3149000108242035},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.2903999984264374},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.29019999504089355},{"id":"https://openalex.org/C188087704","wikidata":"https://www.wikidata.org/wiki/Q369577","display_name":"Standardization","level":2,"score":0.28519999980926514},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.27459999918937683},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.273499995470047},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.26910001039505005},{"id":"https://openalex.org/C2984074130","wikidata":"https://www.wikidata.org/wiki/Q73539779","display_name":"R package","level":2,"score":0.25049999356269836}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.23160","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.23160","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.23160","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.23160","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Benchmarking":[0],"AI":[1,53],"systems":[2],"in":[3,14,26,61,156],"multi-turn":[4,114],"interactive":[5,52,157],"scenarios":[6],"is":[7],"essential":[8],"for":[9,50],"understanding":[10],"their":[11],"practical":[12],"capabilities":[13],"real-world":[15],"applications.":[16],"However,":[17],"existing":[18],"evaluation":[19,32,48,76,96,132,142],"protocols":[20],"are":[21],"highly":[22],"heterogeneous,":[23],"differing":[24],"significantly":[25,130],"dataset":[27],"formats,":[28],"model":[29],"interfaces,":[30],"and":[31,82,100,125,134,141,152],"pipelines,":[33],"which":[34],"severely":[35],"impedes":[36],"systematic":[37],"comparison.":[38],"In":[39],"this":[40],"work,":[41],"we":[42],"present":[43],"UniDial-EvalKit":[44],"(UDE),":[45],"a":[46,71,79,87,148],"unified":[47],"toolkit":[49,140],"assessing":[51],"systems.":[54],"The":[55],"core":[56],"contribution":[57],"of":[58],"UDE":[59,116],"lies":[60],"its":[62],"holistic":[63],"unification:":[64],"it":[65],"standardizes":[66],"heterogeneous":[67],"data":[68],"formats":[69],"into":[70],"universal":[72],"schema,":[73],"streamlines":[74],"complex":[75],"pipelines":[77],"through":[78,97,122],"modular":[80],"architecture,":[81],"aligns":[83],"metric":[84],"calculations":[85],"under":[86],"consistent":[88],"scoring":[89],"interface.":[90],"It":[91],"also":[92,129],"supports":[93],"efficient":[94],"large-scale":[95],"parallel":[98],"generation":[99],"scoring,":[101],"as":[102,104],"well":[103],"checkpoint-based":[105],"caching":[106],"to":[107,146],"eliminate":[108],"redundant":[109],"computation.":[110],"Validated":[111],"across":[112],"diverse":[113],"benchmarks,":[115],"not":[117],"only":[118],"guarantees":[119],"high":[120],"reproducibility":[121],"standardized":[123,149],"workflows":[124],"transparent":[126],"logging,":[127],"but":[128],"improves":[131],"efficiency":[133],"extensibility.":[135],"We":[136],"make":[137],"the":[138],"complete":[139],"scripts":[143],"publicly":[144],"available":[145],"foster":[147],"benchmarking":[150],"ecosystem":[151],"accelerate":[153],"future":[154],"breakthroughs":[155],"AI.":[158]},"counts_by_year":[],"updated_date":"2026-03-26T06:10:45.909354","created_date":"2026-03-26T00:00:00"}
