{"id":"https://openalex.org/W7148619165","doi":"https://doi.org/10.48550/arxiv.2604.00007","title":"Dynin-Omni: Omnimodal Unified Large Diffusion Language Model","display_name":"Dynin-Omni: Omnimodal Unified Large Diffusion Language Model","publication_year":2026,"publication_date":"2026-03-09","ids":{"openalex":"https://openalex.org/W7148619165","doi":"https://doi.org/10.48550/arxiv.2604.00007"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.00007","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.00007","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.00007","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5132811416","display_name":"Jaeik Kim","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Kim, Jaeik","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069745791","display_name":"Woojin Kim","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Woojin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132806346","display_name":"Jihwan Hong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hong, Jihwan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022380724","display_name":"Yejoon Lee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Yejoon","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132818448","display_name":"Sieun Hyeon","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hyeon, Sieun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110993195","display_name":"Mintaek Lim","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lim, Mintaek","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103738672","display_name":"Yunseok Han","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Han, Yunseok","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132754878","display_name":"Dogeun Kim","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Dogeun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132768829","display_name":"Hoeun Lee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Hoeun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132773859","display_name":"Hyunggeun Kim","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Hyunggeun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5132823754","display_name":"Jaeyoung Do","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Do, Jaeyoung","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":11,"corresponding_author_ids":["https://openalex.org/A5132811416"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.878000020980835,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.878000020980835,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.02239999920129776,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.01720000058412552,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/unified-model","display_name":"Unified Model","score":0.660099983215332},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.6154999732971191},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.564300000667572},{"id":"https://openalex.org/keywords/serialization","display_name":"Serialization","score":0.4474000036716461},{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.420199990272522},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.3659000098705292},{"id":"https://openalex.org/keywords/trace","display_name":"TRACE (psycholinguistics)","score":0.36489999294281006},{"id":"https://openalex.org/keywords/orchestration","display_name":"Orchestration","score":0.36340001225471497}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7864999771118164},{"id":"https://openalex.org/C45493050","wikidata":"https://www.wikidata.org/wiki/Q7884934","display_name":"Unified Model","level":2,"score":0.660099983215332},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.6154999732971191},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.564300000667572},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4602999985218048},{"id":"https://openalex.org/C52723943","wikidata":"https://www.wikidata.org/wiki/Q1127410","display_name":"Serialization","level":2,"score":0.4474000036716461},{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.420199990272522},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.3659000098705292},{"id":"https://openalex.org/C75291252","wikidata":"https://www.wikidata.org/wiki/Q1315756","display_name":"TRACE (psycholinguistics)","level":2,"score":0.36489999294281006},{"id":"https://openalex.org/C199168358","wikidata":"https://www.wikidata.org/wiki/Q3367000","display_name":"Orchestration","level":3,"score":0.36340001225471497},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3610999882221222},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.35679998993873596},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3440999984741211},{"id":"https://openalex.org/C130727458","wikidata":"https://www.wikidata.org/wiki/Q1639109","display_name":"Coarticulation","level":3,"score":0.32030001282691956},{"id":"https://openalex.org/C2780719617","wikidata":"https://www.wikidata.org/wiki/Q1030752","display_name":"Salient","level":2,"score":0.30329999327659607},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.3012000024318695},{"id":"https://openalex.org/C179603123","wikidata":"https://www.wikidata.org/wiki/Q1941921","display_name":"Modeling language","level":3,"score":0.2881999909877777},{"id":"https://openalex.org/C100609095","wikidata":"https://www.wikidata.org/wiki/Q1335050","display_name":"Embodied cognition","level":2,"score":0.27630001306533813},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.2694000005722046},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2655999958515167},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.26249998807907104},{"id":"https://openalex.org/C2780966255","wikidata":"https://www.wikidata.org/wiki/Q5474306","display_name":"Foundation (evidence)","level":2,"score":0.25999999046325684},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.2506999969482422}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.00007","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.00007","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.00007","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.00007","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,78],"present":[1],"Dynin-Omni,":[2],"the":[3,136],"first":[4],"masked-diffusion-based":[5],"omnimodal":[6,48,76,154],"foundation":[7,151],"model":[8],"that":[9,30,38],"unifies":[10],"text,":[11],"image,":[12],"and":[13,16,75,90,94,97,113,159,161],"speech":[14,95],"understanding":[15],"generation,":[17,160],"together":[18],"with":[19,41,71,128],"video":[20,92],"understanding,":[21,93],"within":[22],"a":[23,54,67,142,149],"single":[24],"architecture.":[25],"Unlike":[26],"autoregressive":[27],"unified":[28,36,123,143,156],"models":[29,37,124],"serialize":[31],"heterogeneous":[32],"modalities,":[33],"or":[34],"compositional":[35],"require":[39],"orchestration":[40],"external":[42],"modality-specific":[43,130],"decoders,":[44],"Dynin-Omni":[45,65,80,99],"natively":[46],"formulates":[47],"modeling":[49],"as":[50,141],"masked":[51,139],"diffusion":[52,140],"over":[53],"shared":[55],"discrete":[56],"token":[57],"space,":[58],"enabling":[59],"iterative":[60],"refinement":[61],"under":[62],"bidirectional":[63],"context.":[64],"adopts":[66],"multi-stage":[68],"training":[69],"strategy":[70],"model-merging-based":[72],"modality":[73],"expansion":[74],"alignment.":[77],"evaluate":[79],"across":[81],"19":[82],"multimodal":[83,163],"benchmarks":[84],"spanning":[85],"language":[86],"reasoning,":[87],"image":[88],"generation":[89],"editing,":[91],"recognition":[96],"synthesis.":[98],"achieves":[100],"87.6":[101],"on":[102,105,108,111,116],"GSM8K,":[103],"1733.6":[104],"MME-P,":[106],"61.4":[107],"VideoMME,":[109],"0.87":[110],"GenEval,":[112],"2.1":[114],"WER":[115],"LibriSpeech":[117],"test-clean,":[118],"consistently":[119],"outperforming":[120],"existing":[121],"open-source":[122],"while":[125],"remaining":[126],"competitive":[127],"strong":[129],"expert":[131],"systems.":[132],"These":[133],"results":[134],"demonstrate":[135],"potential":[137],"of":[138],"paradigm":[144],"for":[145,152],"any-to-any":[146],"modeling,":[147],"providing":[148],"flexible":[150],"real-time":[153],"systems,":[155],"cross-modal":[157],"retrieval":[158],"embodied":[162],"agents.":[164]},"counts_by_year":[],"updated_date":"2026-04-03T16:44:17.987007","created_date":"2026-04-03T00:00:00"}
