{"id":"https://openalex.org/W7133299406","doi":"https://doi.org/10.48550/arxiv.2603.00045","title":"Breaking the Factorization Barrier in Diffusion Language Models","display_name":"Breaking the Factorization Barrier in Diffusion Language Models","publication_year":2026,"publication_date":"2026-02-09","ids":{"openalex":"https://openalex.org/W7133299406","doi":"https://doi.org/10.48550/arxiv.2603.00045"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.00045","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.00045","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.00045","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5120421413","display_name":"Ian Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Li, Ian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113264652","display_name":"Zilei Shao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shao, Zilei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103119953","display_name":"Benjie Wang","orcid":"https://orcid.org/0000-0002-8477-1886"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Benjie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057778679","display_name":"Rose Yu","orcid":"https://orcid.org/0000-0002-8491-7937"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Rose","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127876489","display_name":"Guy Van den Broeck","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Broeck, Guy Van den","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5070686068","display_name":"Anji Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Anji","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5120421413"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.4415999948978424,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.4415999948978424,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.06499999761581421,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.05400000140070915,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5898000001907349},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5669000148773193},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.5353999733924866},{"id":"https://openalex.org/keywords/factorization","display_name":"Factorization","score":0.5177000164985657},{"id":"https://openalex.org/keywords/joint-probability-distribution","display_name":"Joint probability distribution","score":0.4708999991416931},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.4528999924659729},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.439300000667572},{"id":"https://openalex.org/keywords/diffusion","display_name":"Diffusion","score":0.36719998717308044},{"id":"https://openalex.org/keywords/probability-distribution","display_name":"Probability distribution","score":0.3167000114917755}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7203999757766724},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5898000001907349},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5669000148773193},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.5353999733924866},{"id":"https://openalex.org/C187834632","wikidata":"https://www.wikidata.org/wiki/Q188804","display_name":"Factorization","level":2,"score":0.5177000164985657},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4756999909877777},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.4740999937057495},{"id":"https://openalex.org/C18653775","wikidata":"https://www.wikidata.org/wiki/Q1333358","display_name":"Joint probability distribution","level":2,"score":0.4708999991416931},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.4528999924659729},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.439300000667572},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.36719998717308044},{"id":"https://openalex.org/C149441793","wikidata":"https://www.wikidata.org/wiki/Q200726","display_name":"Probability distribution","level":2,"score":0.3167000114917755},{"id":"https://openalex.org/C2777472644","wikidata":"https://www.wikidata.org/wiki/Q16968992","display_name":"Approximate inference","level":3,"score":0.3109999895095825},{"id":"https://openalex.org/C110121322","wikidata":"https://www.wikidata.org/wiki/Q865811","display_name":"Distribution (mathematics)","level":2,"score":0.3100000023841858},{"id":"https://openalex.org/C2780186347","wikidata":"https://www.wikidata.org/wiki/Q11414","display_name":"Subnetwork","level":2,"score":0.3028999865055084},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.30219998955726624},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.29280000925064087},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.2915000021457672},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.2793999910354614},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.27480000257492065},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.27149999141693115},{"id":"https://openalex.org/C32230216","wikidata":"https://www.wikidata.org/wiki/Q7882499","display_name":"Uncertainty quantification","level":2,"score":0.26660001277923584},{"id":"https://openalex.org/C179603123","wikidata":"https://www.wikidata.org/wiki/Q1941921","display_name":"Modeling language","level":3,"score":0.26649999618530273},{"id":"https://openalex.org/C131584629","wikidata":"https://www.wikidata.org/wiki/Q4308705","display_name":"Coupling (piping)","level":2,"score":0.26489999890327454},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.2556999921798706},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.2531000077724457},{"id":"https://openalex.org/C2780069185","wikidata":"https://www.wikidata.org/wiki/Q7977945","display_name":"Equivalence (formal languages)","level":2,"score":0.25130000710487366}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.00045","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.00045","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.00045","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.00045","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Diffusion":[0,91],"language":[1,156],"models":[2,30,62],"theoretically":[3],"allow":[4],"for":[5],"efficient":[6],"parallel":[7],"generation":[8],"but":[9,57],"are":[10,23,63],"practically":[11],"hindered":[12],"by":[13,35,100],"the":[14,17,77,102,129,141,163,176],"\"factorization":[15],"barrier\":":[16],"assumption":[18],"that":[19,48,96,119],"simultaneously":[20],"predicted":[21],"tokens":[22],"independent.":[24],"This":[25,113],"limitation":[26],"forces":[27],"a":[28,59,72,81,93,107,116,173],"trade-off:":[29],"must":[31],"either":[32],"sacrifice":[33],"speed":[34],"resolving":[36],"dependencies":[37],"sequentially":[38],"or":[39],"suffer":[40],"from":[41,53,58],"incoherence":[42],"due":[43],"to":[44,65,79,139],"factorization.":[45],"We":[46,87],"argue":[47],"this":[49,98],"barrier":[50,99],"arises":[51],"not":[52],"limited":[54],"backbone":[55],"expressivity,":[56],"structural":[60],"misspecification:":[61],"restricted":[64],"fully":[66],"factorized":[67,126],"outputs":[68,189],"because":[69],"explicitly":[70],"parameterizing":[71],"joint":[73,133,148],"distribution":[74,105,117],"would":[75],"require":[76],"Transformer":[78],"output":[80,104],"prohibitively":[82],"large":[83],"number":[84],"of":[85,131,166,175],"parameters.":[86],"propose":[88],"Coupled":[89],"Discrete":[90],"(CoDD),":[92],"hybrid":[94],"framework":[95],"breaks":[97],"replacing":[101],"fully-factorized":[103],"with":[106,146,159],"lightweight,":[108],"tractable":[109],"probabilistic":[110],"inference":[111],"layer.":[112],"formulation":[114],"yields":[115],"family":[118],"is":[120],"significantly":[121,191],"more":[122],"expressive":[123],"than":[124],"standard":[125],"priors,":[127],"enabling":[128,187],"modeling":[130],"complex":[132],"dependencies,":[134],"yet":[135],"remains":[136],"compact":[137],"enough":[138],"avoid":[140],"prohibitive":[142],"parameter":[143],"explosion":[144],"associated":[145],"full":[147],"modeling.":[149],"Empirically,":[150],"CoDD":[151],"seamlessly":[152],"enhances":[153],"diverse":[154],"diffusion":[155],"model":[157],"architectures":[158],"negligible":[160],"overhead,":[161],"matching":[162],"reasoning":[164],"performance":[165,182],"computationally":[167],"intensive":[168],"Reinforcement":[169],"Learning":[170],"baselines":[171],"at":[172,190],"fraction":[174],"training":[177],"cost.":[178],"Furthermore,":[179],"it":[180],"prevents":[181],"collapse":[183],"in":[184],"few-step":[185],"generation,":[186],"high-quality":[188],"reduced":[192],"latencies.":[193],"Code":[194],"available":[195],"at:":[196],"https://github.com/liuanji/CoDD":[197]},"counts_by_year":[],"updated_date":"2026-03-04T07:09:34.246503","created_date":"2026-03-04T00:00:00"}
