{"id":"https://openalex.org/W7138386969","doi":"https://doi.org/10.1609/aaai.v40i34.40153","title":"Explore How to Inject Beneficial Noise in MLLMs","display_name":"Explore How to Inject Beneficial Noise in MLLMs","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138386969","doi":"https://doi.org/10.1609/aaai.v40i34.40153"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i34.40153","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i34.40153","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40153/44114","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40153/44114","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129738739","display_name":"Ruishu Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210164862","display_name":"Artificial Intelligence in Medicine (Canada)","ror":"https://ror.org/05p590m36","country_code":"CA","type":"company","lineage":["https://openalex.org/I4210164862"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Ruishu Zhu","raw_affiliation_strings":["School of Artificial Intelligence, OPtics and ElectroNics (iOPEN), Northwestern Polytechnical University\nInstitute of Artificial Intelligence (TeleAI), China Telecom"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, OPtics and ElectroNics (iOPEN), Northwestern Polytechnical University\nInstitute of Artificial Intelligence (TeleAI), China Telecom","institution_ids":["https://openalex.org/I4210164862"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129700547","display_name":"Sida Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210164862","display_name":"Artificial Intelligence in Medicine (Canada)","ror":"https://ror.org/05p590m36","country_code":"CA","type":"company","lineage":["https://openalex.org/I4210164862"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Sida Huang","raw_affiliation_strings":["School of Artificial Intelligence, OPtics and ElectroNics (iOPEN), Northwestern Polytechnical University\nInstitute of Artificial Intelligence (TeleAI), China Telecom"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, OPtics and ElectroNics (iOPEN), Northwestern Polytechnical University\nInstitute of Artificial Intelligence (TeleAI), China Telecom","institution_ids":["https://openalex.org/I4210164862"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100313883","display_name":"Ziheng Jiao","orcid":null},"institutions":[{"id":"https://openalex.org/I4210160618","display_name":"Huawei Technologies (United Kingdom)","ror":"https://ror.org/056gzgs71","country_code":"GB","type":"company","lineage":["https://openalex.org/I2250955327","https://openalex.org/I4210160618"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Ziheng Jiao","raw_affiliation_strings":["HuaWei Technologies Co., Ltd"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"HuaWei Technologies Co., Ltd","institution_ids":["https://openalex.org/I4210160618"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129706596","display_name":"Hongyuan Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210164862","display_name":"Artificial Intelligence in Medicine (Canada)","ror":"https://ror.org/05p590m36","country_code":"CA","type":"company","lineage":["https://openalex.org/I4210164862"]},{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["CA","HK"],"is_corresponding":false,"raw_author_name":"Hongyuan Zhang","raw_affiliation_strings":["Institute of Artificial Intelligence (TeleAI), China Telecom\nThe University of Hong Kong"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Artificial Intelligence (TeleAI), China Telecom\nThe University of Hong Kong","institution_ids":["https://openalex.org/I4210164862","https://openalex.org/I889458895"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":9.8252,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.96723647,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":"40","issue":"34","first_page":"29150","last_page":"29158"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9197999835014343,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9197999835014343,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.025800000876188278,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.014100000262260437,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.7141000032424927},{"id":"https://openalex.org/keywords/generator","display_name":"Generator (circuit theory)","score":0.5871000289916992},{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.5475000143051147},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5388000011444092},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5310999751091003},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5217999815940857},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.48669999837875366}],"concepts":[{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.7141000032424927},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6973000168800354},{"id":"https://openalex.org/C2780992000","wikidata":"https://www.wikidata.org/wiki/Q17016113","display_name":"Generator (circuit theory)","level":3,"score":0.5871000289916992},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.5475000143051147},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5388000011444092},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5310999751091003},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5217999815940857},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.48669999837875366},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4361000061035156},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.37049999833106995},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3598000109195709},{"id":"https://openalex.org/C29265498","wikidata":"https://www.wikidata.org/wiki/Q7047719","display_name":"Noise measurement","level":3,"score":0.3472000062465668},{"id":"https://openalex.org/C100675267","wikidata":"https://www.wikidata.org/wiki/Q1371624","display_name":"Background noise","level":2,"score":0.3167000114917755},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.3075000047683716},{"id":"https://openalex.org/C116822448","wikidata":"https://www.wikidata.org/wiki/Q1879301","display_name":"Noise control","level":3,"score":0.28369998931884766},{"id":"https://openalex.org/C19768560","wikidata":"https://www.wikidata.org/wiki/Q320727","display_name":"Dependency (UML)","level":2,"score":0.2802000045776367},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.27900001406669617},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.275299996137619}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i34.40153","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i34.40153","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40153/44114","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i34.40153","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i34.40153","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40153/44114","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7138386969.pdf","grobid_xml":"https://content.openalex.org/works/W7138386969.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Multimodal":[0,56],"Large":[1],"Language":[2],"Models":[3],"(MLLMs)":[4],"have":[5],"played":[6],"an":[7],"increasingly":[8],"important":[9],"role":[10],"in":[11,98],"multimodal":[12,90],"intelligence.":[13],"However,":[14],"the":[15,69,75,113],"existing":[16,151],"fine-tuning":[17,34,63,148,152],"methods":[18,44],"often":[19],"ignore":[20],"cross-modal":[21,96,124],"heterogeneity,":[22],"limiting":[23],"their":[24],"full":[25,48,146],"potential.":[26],"In":[27],"this":[28,108],"work,":[29],"we":[30,73,87],"propose":[31],"a":[32,81,89],"novel":[33],"strategy":[35],"by":[36,64],"injecting":[37,65],"beneficial":[38,105],"random":[39],"noise,":[40],"which":[41,86],"outperforms":[42],"previous":[43],"and":[45,127,139,149],"even":[46],"surpasses":[47,145],"fine-tuning,":[49],"with":[50],"minimal":[51],"additional":[52,161],"parameters.":[53,162],"The":[54],"proposed":[55],"Noise":[57],"Generator":[58],"(MuNG)":[59],"enables":[60],"efficient":[61],"modality":[62],"customized":[66],"noise":[67,91,111],"into":[68,112],"frozen":[70],"MLLMs.":[71],"Specifically,":[72],"reformulate":[74],"reasoning":[76],"process":[77],"of":[78,110],"MLLMs":[79,114],"from":[80],"variational":[82],"inference":[83],"perspective,":[84],"upon":[85],"design":[88],"generator":[92],"that":[93,142],"dynamically":[94],"analyzes":[95],"relationships":[97],"image-text":[99],"pairs":[100],"to":[101,121,157],"generate":[102],"task":[103],"adaptive":[104],"noise.":[106],"Injecting":[107],"type":[109],"effectively":[115],"suppresses":[116],"irrelevant":[117],"semantic":[118],"components,":[119],"leading":[120],"significantly":[122],"improved":[123],"representation":[125],"alignment":[126],"enhanced":[128],"performance":[129],"on":[130,134],"downstream":[131],"tasks.":[132],"Experiments":[133],"two":[135],"mainstream":[136],"MLLMs,":[137],"QwenVL":[138],"LLaVA,":[140],"demonstrate":[141],"our":[143],"method":[144],"parameter":[147],"other":[150],"approaches,":[153],"while":[154],"requiring":[155],"adjustments":[156],"only":[158],"about":[159],"1~2%":[160]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-18T00:00:00"}
