{"id":"https://openalex.org/W7118321653","doi":"https://doi.org/10.48550/arxiv.2601.00942","title":"Reliability Under Randomness: An Empirical Analysis of Sparse and Dense Language Models Across Decoding Temperatures","display_name":"Reliability Under Randomness: An Empirical Analysis of Sparse and Dense Language Models Across Decoding Temperatures","publication_year":2026,"publication_date":"2026-01-02","ids":{"openalex":"https://openalex.org/W7118321653","doi":"https://doi.org/10.48550/arxiv.2601.00942"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2601.00942","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.00942","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2601.00942","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5085992717","display_name":"Kabir Grover","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Grover, Kabir","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5085992717"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.22110000252723694,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.22110000252723694,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.1639000028371811,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.061799999326467514,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.760200023651123},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5232999920845032},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.5049999952316284},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.4961000084877014},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.421999990940094},{"id":"https://openalex.org/keywords/randomness","display_name":"Randomness","score":0.4196999967098236},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.3977000117301941},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.3783000111579895}],"concepts":[{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.760200023651123},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6585999727249146},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5232999920845032},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.5049999952316284},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.4961000084877014},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4366999864578247},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.421999990940094},{"id":"https://openalex.org/C125112378","wikidata":"https://www.wikidata.org/wiki/Q176640","display_name":"Randomness","level":2,"score":0.4196999967098236},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.3977000117301941},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.3783000111579895},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.34860000014305115},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.3416999876499176},{"id":"https://openalex.org/C193969084","wikidata":"https://www.wikidata.org/wiki/Q7452500","display_name":"Sequential decoding","level":4,"score":0.33809998631477356},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.27390000224113464},{"id":"https://openalex.org/C85847156","wikidata":"https://www.wikidata.org/wiki/Q59015987","display_name":"Verifiable secret sharing","level":3,"score":0.2709999978542328},{"id":"https://openalex.org/C51823790","wikidata":"https://www.wikidata.org/wiki/Q504353","display_name":"Greedy algorithm","level":2,"score":0.26750001311302185},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.26249998807907104},{"id":"https://openalex.org/C2777317252","wikidata":"https://www.wikidata.org/wiki/Q18393516","display_name":"Rare events","level":2,"score":0.2529999911785126},{"id":"https://openalex.org/C124066611","wikidata":"https://www.wikidata.org/wiki/Q28684319","display_name":"Sparse approximation","level":2,"score":0.251800000667572}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2601.00942","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.00942","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2601.00942","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.00942","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"increasing":[1],"prevalence":[2],"of":[3,165,177],"sparse":[4,37,125,142,182,192],"Mixture-of-Experts":[5],"(MoE)":[6],"architectures":[7,193],"in":[8,27,55,185,190],"large":[9],"language":[10,183],"models":[11,57,184],"raises":[12],"important":[13],"questions":[14],"regarding":[15],"their":[16],"reliability":[17,64],"under":[18],"stochastic":[19],"decoding.":[20],"While":[21],"conditional":[22,53],"computation":[23,54],"enables":[24],"substantial":[25],"gains":[26],"computational":[28],"efficiency,":[29],"it":[30],"remains":[31],"unclear":[32],"whether":[33,52],"the":[34,124,132,141,162,175],"interaction":[35],"between":[36],"routing":[38],"and":[39,79,114],"temperature-based":[40],"sampling":[41],"compromises":[42],"output":[43,109,200],"stability":[44,129],"relative":[45],"to":[46,62,101,131,167],"dense":[47,133],"architectures.":[48],"This":[49],"work":[50],"investigates":[51],"MoE":[56],"amplifies":[58],"decoding-induced":[59],"randomness,":[60],"leading":[61],"reduced":[63],"as":[65,148],"temperature":[66,149],"increases.":[67,150],"We":[68,173],"evaluate":[69],"three":[70],"representative":[71],"models:":[72],"OLMoE-7B":[73],"(sparse":[74,77],"base),":[75],"Mixtral-8x7B":[76],"instruction-tuned),":[78],"Qwen2.5-3B":[80],"(dense":[81],"instruction-tuned)":[82],"on":[83,170],"deterministic":[84,171],"arithmetic":[85],"reasoning":[86],"tasks":[87],"with":[88],"objectively":[89],"verifiable":[90],"answers.":[91],"Experiments":[92],"span":[93],"four":[94],"decoding":[95,100,138,168],"configurations,":[96],"ranging":[97],"from":[98],"greedy":[99],"T=1.0.":[102],"Our":[103],"evaluation":[104],"encompasses":[105],"accuracy,":[106],"format":[107],"compliance,":[108],"consistency":[110],"across":[111,136],"repeated":[112],"generations,":[113],"confidence":[115],"metrics,":[116],"totaling":[117],"9,360":[118],"model":[119,127,135,144],"generations.":[120],"Results":[121],"demonstrate":[122],"that":[123,154],"instruction-tuned":[126,134],"exhibits":[128],"comparable":[130],"all":[137],"temperatures,":[139],"while":[140],"base":[143],"shows":[145],"systematic":[146],"degradation":[147],"These":[151],"findings":[152],"indicate":[153],"instruction":[155],"tuning,":[156],"rather":[157],"than":[158],"architectural":[159],"sparsity,":[160],"is":[161],"primary":[163],"determinant":[164],"robustness":[166],"randomness":[169],"tasks.":[172],"discuss":[174],"implications":[176],"these":[178],"results":[179],"for":[180],"deploying":[181],"reliability-critical":[186],"applications,":[187],"highlighting":[188],"scenarios":[189],"which":[191],"can":[194],"be":[195],"safely":[196],"adopted":[197],"without":[198],"sacrificing":[199],"stability.":[201]},"counts_by_year":[],"updated_date":"2026-01-08T20:10:11.968330","created_date":"2026-01-08T00:00:00"}
