{"id":"https://openalex.org/W7159670352","doi":"https://doi.org/10.48550/arxiv.2604.27393","title":"MiniCPM-o 4.5: Towards Real-Time Full-Duplex Omni-Modal Interaction","display_name":"MiniCPM-o 4.5: Towards Real-Time Full-Duplex Omni-Modal Interaction","publication_year":2026,"publication_date":"2026-04-30","ids":{"openalex":"https://openalex.org/W7159670352","doi":"https://doi.org/10.48550/arxiv.2604.27393"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.27393","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.27393","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.27393","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100862506","display_name":"Junbo Cui","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cui, Junbo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134969164","display_name":"Bokai Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Bokai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134934533","display_name":"Chongyi Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Chongyi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134986347","display_name":"Tianyu Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Tianyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062620840","display_name":"Wei-Yue Sun","orcid":"https://orcid.org/0000-0002-1953-7698"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Weiyue","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100297804","display_name":"Yingjing Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Yingjing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134940799","display_name":"Tianran Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Tianran","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134931554","display_name":"Zhihui He","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Zhihui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134931733","display_name":"Wenshuo Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Wenshuo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134986489","display_name":"Tianchi Cai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cai, Tianchi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055247809","display_name":"Jiancheng Gui","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gui, Jiancheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134958356","display_name":"Luoyuan Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Luoyuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134985628","display_name":"Xian Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Xian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134971558","display_name":"Fuwei Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Fuwei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134931931","display_name":"Moye Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Moye","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134980016","display_name":"Zhuo Lin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, Zhuo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134949176","display_name":"Hanyu Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Hanyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134939776","display_name":"Qingxin Gui","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gui, Qingxin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134969966","display_name":"Qingzhe Han","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Han, Qingzhe","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134983340","display_name":"Yuyang Wen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wen, Yuyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134974537","display_name":"Huiping Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Huiping","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130852368","display_name":"Rongkang Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Rongkang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100378919","display_name":"Yaqi Zhang","orcid":"https://orcid.org/0000-0002-6902-337X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yaqi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040731629","display_name":"H. X. Wei","orcid":"https://orcid.org/0000-0002-5559-119X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei, Hongliang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134996334","display_name":"Chi Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Chi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134940894","display_name":"You Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, You","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111082343","display_name":"Kechen Fang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fang, Kechen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134968339","display_name":"Jie Zhou (28945)","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Jie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134962258","display_name":"Yuxuan Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Yuxuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134956542","display_name":"Guoyang Zeng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zeng, Guoyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014699953","display_name":"Chaojun Xiao","orcid":"https://orcid.org/0000-0001-6039-0942"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiao, Chaojun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134996678","display_name":"Yankai Lin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, Yankai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134962449","display_name":"Xu Han","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Han, Xu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134929153","display_name":"Maosong Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Maosong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134988899","display_name":"Zhiyuan Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Zhiyuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134960874","display_name":"Yuan Yao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yao, Yuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":36,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.4260999858379364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.4260999858379364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.13809999823570251,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.13079999387264252,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.6018000245094299},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.5297999978065491},{"id":"https://openalex.org/keywords/multimodal-interaction","display_name":"Multimodal interaction","score":0.519599974155426},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.5098999738693237},{"id":"https://openalex.org/keywords/multimodality","display_name":"Multimodality","score":0.49779999256134033},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.4878999888896942},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.4828999936580658},{"id":"https://openalex.org/keywords/edge-device","display_name":"Edge device","score":0.3714999854564667}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7843999862670898},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.6018000245094299},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.5843999981880188},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.5297999978065491},{"id":"https://openalex.org/C135641252","wikidata":"https://www.wikidata.org/wiki/Q738567","display_name":"Multimodal interaction","level":2,"score":0.519599974155426},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.5098999738693237},{"id":"https://openalex.org/C2780910867","wikidata":"https://www.wikidata.org/wiki/Q1952416","display_name":"Multimodality","level":2,"score":0.49779999256134033},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.4878999888896942},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.4828999936580658},{"id":"https://openalex.org/C138236772","wikidata":"https://www.wikidata.org/wiki/Q25098575","display_name":"Edge device","level":3,"score":0.3714999854564667},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.36070001125335693},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.3531000018119812},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3465999960899353},{"id":"https://openalex.org/C201025465","wikidata":"https://www.wikidata.org/wiki/Q11248500","display_name":"User experience design","level":2,"score":0.33329999446868896},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.2921999990940094},{"id":"https://openalex.org/C158600405","wikidata":"https://www.wikidata.org/wiki/Q5054566","display_name":"Causal inference","level":2,"score":0.28999999165534973},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.27079999446868896},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.26409998536109924},{"id":"https://openalex.org/C89505385","wikidata":"https://www.wikidata.org/wiki/Q47146","display_name":"User interface","level":2,"score":0.26170000433921814},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.2524000108242035}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.27393","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.27393","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.27393","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.27393","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recent":[0],"progress":[1],"in":[2,83,115,198,212],"multimodal":[3,28,86,97],"large":[4],"language":[5],"models":[6,57,70],"(MLLMs)":[7],"has":[8],"brought":[9],"AI":[10],"capabilities":[11],"from":[12,26,58],"static":[13],"offline":[14],"data":[15],"processing":[16],"to":[17,75,180],"real-time":[18,104,238],"streaming":[19,147],"interaction,":[20,98],"yet":[21],"they":[22],"still":[23,51],"remain":[24,71],"far":[25],"human-level":[27],"interaction.":[29,107],"The":[30,137],"key":[31,138],"bottlenecks":[32],"are":[33,50],"no":[34],"longer":[35],"modality":[36],"coverage":[37],"or":[38,126],"latency":[39],"alone,":[40],"but":[41],"the":[42,84,134,183,234],"interaction":[43,165,241],"paradigm":[44],"itself.":[45],"First,":[46],"perception":[47,173],"and":[48,112,153,174,176,215,231],"response":[49,175],"separated":[52],"into":[53,166],"alternating":[54],"phases,":[55],"preventing":[56],"incorporating":[59],"new":[60],"inputs":[61,152],"for":[62],"timely":[63],"adjustment":[64],"during":[65],"generation.":[66],"Second,":[67],"most":[68],"current":[69],"reactive,":[72],"responding":[73],"only":[74],"explicit":[76],"user":[77],"requests":[78],"instead":[79],"of":[80,133,189],"acting":[81],"proactively":[82],"evolving":[85],"environment.":[87],"We":[88],"present":[89],"MiniCPM-o":[90,141,192],"4.5,":[91],"our":[92],"latest":[93],"effort":[94],"towards":[95],"human-like":[96],"which":[99],"mitigates":[100],"these":[101],"gaps":[102],"by":[103,226],"full-duplex":[105,239],"omni-modal":[106,151,213,240],"It":[108,208],"can":[109,236],"see,":[110],"listen,":[111],"speak":[113],"simultaneously":[114],"real-time,":[116],"while":[117],"also":[118,209],"exhibiting":[119],"proactive":[120,178],"behaviors":[121],"such":[122],"as":[123],"issuing":[124],"reminders":[125],"comments":[127],"based":[128],"on":[129,242],"its":[130,206,227],"continuous":[131],"understanding":[132,214],"live":[135],"scene.":[136],"technique":[139],"behind":[140],"4.5":[142,193],"is":[143],"Omni-Flow,":[144],"a":[145,156,167,187],"unified":[146],"framework":[148],"that":[149],"aligns":[150],"outputs":[154],"along":[155],"shared":[157],"temporal":[158],"axis.":[159],"This":[160],"formulation":[161],"converts":[162],"conventional":[163],"turn-based":[164],"full-duplex,":[168],"time-aligned":[169],"process,":[170],"enabling":[171],"simultaneous":[172],"allowing":[177],"behavior":[179],"arise":[181],"within":[182],"same":[184],"framework.":[185],"With":[186],"total":[188],"9B":[190],"parameters,":[191],"approaches":[194],"Gemini":[195],"2.5":[196],"Flash":[197],"vision-language":[199],"capabilities,":[200],"delivering":[201],"state-of-the-art":[202],"open-source":[203],"performance":[204],"at":[205],"scale.":[207],"surpasses":[210],"Qwen3-Omni-30B-A3B":[211],"delivers":[216],"better":[217],"speech":[218],"generation,":[219],"with":[220,245],"significantly":[221],"higher":[222],"computation":[223],"efficiency.":[224],"Driven":[225],"efficient":[228],"architecture":[229],"design":[230],"inference":[232],"optimization,":[233],"model":[235],"perform":[237],"edge":[243],"devices":[244],"less":[246],"than":[247],"12GB":[248],"RAM":[249],"cost.":[250]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-02T00:00:00"}
