{"id":"https://openalex.org/W7118867548","doi":"https://doi.org/10.48550/arxiv.2601.00624","title":"Do Chatbot LLMs Talk Too Much? The YapBench Benchmark","display_name":"Do Chatbot LLMs Talk Too Much? The YapBench Benchmark","publication_year":2026,"publication_date":"2026-01-02","ids":{"openalex":"https://openalex.org/W7118867548","doi":"https://doi.org/10.48550/arxiv.2601.00624"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2601.00624","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.00624","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2601.00624","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5079540784","display_name":"\u0412.\u0412. \u0411\u043e\u0440\u0438\u0441\u043e\u0432","orcid":"https://orcid.org/0000-0001-7357-9365"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Borisov, Vadim","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122211677","display_name":"Michael Gr\u00f6ger","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gr\u00f6ger, Michael","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030534930","display_name":"Mina Mikhael","orcid":"https://orcid.org/0000-0002-6630-7553"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mikhael, Mina","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5089725861","display_name":"R. Schreiber","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Schreiber, Richard H.","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5079540784"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.3847000002861023,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.3847000002861023,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T12128","display_name":"AI in Service Interactions","score":0.20960000157356262,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.09480000287294388,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6513000130653381},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.6201000213623047},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5835999846458435},{"id":"https://openalex.org/keywords/oracle","display_name":"Oracle","score":0.4066999852657318},{"id":"https://openalex.org/keywords/snippet","display_name":"Snippet","score":0.40310001373291016},{"id":"https://openalex.org/keywords/boilerplate-text","display_name":"Boilerplate text","score":0.3953999876976013},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.39500001072883606},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.38040000200271606}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6513000130653381},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6383000016212463},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.6201000213623047},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5835999846458435},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.44699999690055847},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4357999861240387},{"id":"https://openalex.org/C55166926","wikidata":"https://www.wikidata.org/wiki/Q2892946","display_name":"Oracle","level":2,"score":0.4066999852657318},{"id":"https://openalex.org/C2777822670","wikidata":"https://www.wikidata.org/wiki/Q1120538","display_name":"Snippet","level":2,"score":0.40310001373291016},{"id":"https://openalex.org/C75701414","wikidata":"https://www.wikidata.org/wiki/Q1651672","display_name":"Boilerplate text","level":2,"score":0.3953999876976013},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.39500001072883606},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.38040000200271606},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.36250001192092896},{"id":"https://openalex.org/C148524875","wikidata":"https://www.wikidata.org/wiki/Q6975395","display_name":"F1 score","level":2,"score":0.30309998989105225},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.28999999165534973},{"id":"https://openalex.org/C2779041454","wikidata":"https://www.wikidata.org/wiki/Q870780","display_name":"Chatbot","level":2,"score":0.2847999930381775},{"id":"https://openalex.org/C106347477","wikidata":"https://www.wikidata.org/wiki/Q5384228","display_name":"Equating","level":3,"score":0.27090001106262207},{"id":"https://openalex.org/C61641136","wikidata":"https://www.wikidata.org/wiki/Q1107019","display_name":"Cognitive load","level":3,"score":0.27079999446868896},{"id":"https://openalex.org/C204495577","wikidata":"https://www.wikidata.org/wiki/Q1205349","display_name":"Callback","level":2,"score":0.26499998569488525},{"id":"https://openalex.org/C2780719617","wikidata":"https://www.wikidata.org/wiki/Q1030752","display_name":"Salient","level":2,"score":0.2646999955177307},{"id":"https://openalex.org/C71901391","wikidata":"https://www.wikidata.org/wiki/Q7126699","display_name":"Upload","level":2,"score":0.2635999917984009},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.2590999901294708},{"id":"https://openalex.org/C132010649","wikidata":"https://www.wikidata.org/wiki/Q189222","display_name":"Intuition","level":2,"score":0.25850000977516174},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.2554999887943268},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.25270000100135803},{"id":"https://openalex.org/C42629822","wikidata":"https://www.wikidata.org/wiki/Q1346408","display_name":"Geocoding","level":2,"score":0.2522999942302704}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2601.00624","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.00624","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2601.00624","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.00624","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs)":[3],"such":[4],"as":[5,12],"ChatGPT,":[6],"Claude,":[7],"and":[8,35,46,88,163,188,198,211],"Gemini":[9],"increasingly":[10],"act":[11],"general-purpose":[13],"copilots,":[14],"yet":[15],"they":[16],"often":[17],"respond":[18],"with":[19,159],"unnecessary":[20],"length":[21,52,99,187],"on":[22,73,111,195,203],"simple":[23],"requests,":[24],"adding":[25],"redundant":[26],"explanations,":[27],"hedging,":[28],"or":[29,144,172,200],"boilerplate":[30],"that":[31,43],"increases":[32],"cognitive":[33],"load":[34],"inflates":[36],"token-based":[37],"inference":[38],"cost.":[39],"Prior":[40],"work":[41],"suggests":[42],"preference-based":[44],"post-training":[45],"LLM-judged":[47],"evaluations":[48],"can":[49],"induce":[50],"systematic":[51],"bias,":[53],"where":[54,147,168],"longer":[55],"answers":[56],"are":[57],"rewarded":[58],"even":[59],"at":[60],"comparable":[61],"quality.":[62],"We":[63,115,207],"introduce":[64],"YapBench,":[65],"a":[66,80,83,89,122,152,169,213],"lightweight":[67],"benchmark":[68,210],"for":[69,216],"quantifying":[70],"user-visible":[71],"over-generation":[72],"brevity-ideal":[74,140],"prompts.":[75],"Each":[76],"item":[77],"consists":[78],"of":[79,126],"single-turn":[81],"prompt,":[82],"curated":[84],"minimal-sufficient":[85],"baseline":[86,102],"answer,":[87],"category":[90],"label.":[91],"Our":[92],"primary":[93],"metric,":[94],"YapScore,":[95],"measures":[96],"excess":[97,186],"response":[98],"beyond":[100],"the":[101,120,148,209],"in":[103,184],"characters,":[104],"enabling":[105],"comparisons":[106],"across":[107],"models":[108],"without":[109],"relying":[110],"any":[112],"specific":[113],"tokenizer.":[114],"summarize":[116],"model":[117],"performance":[118],"via":[119],"YapIndex,":[121],"uniformly":[123],"weighted":[124],"average":[125],"category-level":[127],"median":[128,185],"YapScores.":[129],"YapBench":[130],"contains":[131],"over":[132,220],"three":[133,138],"hundred":[134],"English":[135],"prompts":[136],"spanning":[137],"common":[139],"settings:":[141],"(A)":[142],"minimal":[143],"ambiguous":[145,196],"inputs":[146,197],"ideal":[149],"behavior":[150,219],"is":[151],"short":[153,160],"clarification,":[154],"(B)":[155],"closed-form":[156],"factual":[157],"questions":[158],"stable":[161],"answers,":[162],"(C)":[164],"one-line":[165,204],"coding":[166],"tasks":[167],"single":[170],"command":[171],"snippet":[173],"suffices.":[174],"Evaluating":[175],"76":[176],"assistant":[177],"LLMs,":[178],"we":[179],"observe":[180],"an":[181],"order-of-magnitude":[182],"spread":[183],"distinct":[189],"category-specific":[190],"failure":[191],"modes,":[192],"including":[193],"vacuum-filling":[194],"explanation":[199],"formatting":[201],"overhead":[202],"technical":[205],"requests.":[206],"release":[208],"maintain":[212],"live":[214],"leaderboard":[215],"tracking":[217],"verbosity":[218],"time.":[221]},"counts_by_year":[],"updated_date":"2026-01-08T20:10:11.968330","created_date":"2026-01-08T00:00:00"}
