{"id":"https://openalex.org/W7161030667","doi":"https://doi.org/10.48550/arxiv.2605.11128","title":"Sampling More, Getting Less: Calibration is the Diversity Bottleneck in LLMs","display_name":"Sampling More, Getting Less: Calibration is the Diversity Bottleneck in LLMs","publication_year":2026,"publication_date":"2026-05-11","ids":{"openalex":"https://openalex.org/W7161030667","doi":"https://doi.org/10.48550/arxiv.2605.11128"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.11128","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.11128","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.11128","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5116235417","display_name":"Amin Banayeeanzade","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Banayeeanzade, Amin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126936513","display_name":"Qingchuan Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Qingchuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120555299","display_name":"Dhruv Tarsadiya","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tarsadiya, Dhruv","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136072103","display_name":"Fatemeh Bahrani","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bahrani, Fatemeh","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133898760","display_name":"Leonardo Blas","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Blas, Leonardo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066674792","display_name":"Alfy Samuel","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Samuel, Alfy","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136059325","display_name":"Robin Jia","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jia, Robin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041237539","display_name":"Meisam Razaviyayn","orcid":"https://orcid.org/0000-0003-4342-6661"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Razaviyayn, Meisam","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136058524","display_name":"Sai Praneeth Karimireddy","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Karimireddy, Sai Praneeth","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.3314000070095062,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.3314000070095062,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.26350000500679016,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.1177000030875206,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.8428999781608582},{"id":"https://openalex.org/keywords/diversity","display_name":"Diversity (politics)","score":0.6772000193595886},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.5737000107765198},{"id":"https://openalex.org/keywords/oracle","display_name":"Oracle","score":0.5478000044822693},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5443999767303467},{"id":"https://openalex.org/keywords/calibration","display_name":"Calibration","score":0.5044000148773193},{"id":"https://openalex.org/keywords/cutoff","display_name":"Cutoff","score":0.4255000054836273},{"id":"https://openalex.org/keywords/probability-mass-function","display_name":"Probability mass function","score":0.41029998660087585},{"id":"https://openalex.org/keywords/order","display_name":"Order (exchange)","score":0.40450000762939453}],"concepts":[{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.8428999781608582},{"id":"https://openalex.org/C2781316041","wikidata":"https://www.wikidata.org/wiki/Q1230584","display_name":"Diversity (politics)","level":2,"score":0.6772000193595886},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5758000016212463},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.5737000107765198},{"id":"https://openalex.org/C55166926","wikidata":"https://www.wikidata.org/wiki/Q2892946","display_name":"Oracle","level":2,"score":0.5478000044822693},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5443999767303467},{"id":"https://openalex.org/C165838908","wikidata":"https://www.wikidata.org/wiki/Q736777","display_name":"Calibration","level":2,"score":0.5044000148773193},{"id":"https://openalex.org/C2778217198","wikidata":"https://www.wikidata.org/wiki/Q556977","display_name":"Cutoff","level":2,"score":0.4255000054836273},{"id":"https://openalex.org/C197096303","wikidata":"https://www.wikidata.org/wiki/Q869887","display_name":"Probability mass function","level":3,"score":0.41029998660087585},{"id":"https://openalex.org/C182306322","wikidata":"https://www.wikidata.org/wiki/Q1779371","display_name":"Order (exchange)","level":2,"score":0.40450000762939453},{"id":"https://openalex.org/C52740198","wikidata":"https://www.wikidata.org/wiki/Q1539564","display_name":"Importance sampling","level":3,"score":0.3804999887943268},{"id":"https://openalex.org/C149441793","wikidata":"https://www.wikidata.org/wiki/Q200726","display_name":"Probability distribution","level":2,"score":0.3560999929904938},{"id":"https://openalex.org/C151201525","wikidata":"https://www.wikidata.org/wiki/Q177239","display_name":"Limit (mathematics)","level":2,"score":0.3393999934196472},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3370000123977661},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.3285999894142151},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.32850000262260437},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.32089999318122864},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.3138999938964844},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.31349998712539673},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3122999966144562},{"id":"https://openalex.org/C115051666","wikidata":"https://www.wikidata.org/wiki/Q6522493","display_name":"Ranging","level":2,"score":0.29330000281333923},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.29280000925064087},{"id":"https://openalex.org/C75917345","wikidata":"https://www.wikidata.org/wiki/Q2725298","display_name":"Sampling bias","level":3,"score":0.28769999742507935},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.28060001134872437},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.2720000147819519},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2694000005722046},{"id":"https://openalex.org/C75373757","wikidata":"https://www.wikidata.org/wiki/Q7410160","display_name":"Sampling design","level":3,"score":0.26910001039505005},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.2635999917984009},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.25519999861717224},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.25119999051094055}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.11128","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.11128","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.11128","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.11128","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.648350715637207,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Diversity":[0],"is":[1,38,118,195],"essential":[2],"for":[3,31,166],"language-model":[4],"applications":[5],"ranging":[6],"from":[7],"creative":[8],"generation":[9],"to":[10,61],"scientific":[11],"discovery,":[12],"yet":[13],"modern":[14],"LLMs":[15],"often":[16],"collapse":[17,60,194],"into":[18,80],"a":[19,54,128,198,205],"narrow":[20],"subset":[21],"of":[22,35,84,130,200,207],"plausible":[23],"outputs.":[24],"While":[25],"prior":[26],"work":[27],"has":[28],"developed":[29],"benchmarks":[30],"measuring":[32],"this":[33],"lack":[34],"diversity,":[36],"less":[37],"known":[39,174],"about":[40],"how":[41,62],"the":[42,50,78,213],"step-by-step":[43],"probability":[44,66,116],"distributions":[45],"at":[46],"inference":[47],"time":[48],"cause":[49],"problem.":[51],"We":[52,142],"introduce":[53],"validity--diversity":[55],"framework":[56,76],"that":[57,148,192],"attributes":[58],"diversity":[59,193],"an":[63],"LLM":[64,214],"allocates":[65],"mass":[67,117],"across":[68,152],"valid":[69,89,107,124,132,175],"and":[70,109,133,146,177,188,209],"invalid":[71,96,111,134],"continuations":[72,108,125],"during":[73],"decoding.":[74],"This":[75],"decomposes":[77],"bottleneck":[79],"two":[81],"complementary":[82],"forms":[83],"miscalibration.":[85],"First,":[86],"order":[87,208],"calibration:":[88,115],"tokens":[90],"are":[91],"not":[92,196],"reliably":[93],"ranked":[94],"above":[95],"tokens,":[97,135],"so":[98,136],"rank-based":[99],"cutoff":[100,179],"rules":[101],"must":[102],"trade":[103],"off":[104],"between":[105],"recovering":[106],"admitting":[110],"ones.":[112],"Second,":[113],"shape":[114,210],"overly":[119],"concentrated":[120],"only":[121],"on":[122],"few":[123],"while":[126],"having":[127],"heavy-tail":[129],"mixed":[131],"maintaining":[137],"high":[138],"validity":[139],"limits":[140],"diversity.":[141,160],"formalize":[143],"both":[144],"mechanisms":[145],"show":[147],"local":[149],"failures":[150],"compound":[151],"decoding":[153],"steps,":[154],"producing":[155],"strong":[156],"sequence-level":[157],"losses":[158],"in":[159,212],"Empirically,":[161],"we":[162,190],"develop":[163],"controlled":[164],"diagnostics":[165],"probing":[167],"these":[168],"bottlenecks,":[169],"including":[170],"tasks":[171],"with":[172],"exactly":[173],"sets":[176],"oracle":[178],"baselines.":[180],"Across":[181],"14":[182],"language":[183],"models":[184],"spanning":[185],"multiple":[186],"families":[187],"scales,":[189],"find":[191],"merely":[197],"limitation":[199],"particular":[201],"sampling":[202],"heuristics,":[203],"but":[204],"consequence":[206],"miscalibration":[211],"distribution.":[215]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-14T00:00:00"}
