{"id":"https://openalex.org/W4408564121","doi":"https://doi.org/10.1109/tcsvt.2025.3551766","title":"M2IST: Multi-Modal Interactive Side-Tuning for Efficient Referring Expression Comprehension","display_name":"M2IST: Multi-Modal Interactive Side-Tuning for Efficient Referring Expression Comprehension","publication_year":2025,"publication_date":"2025-03-18","ids":{"openalex":"https://openalex.org/W4408564121","doi":"https://doi.org/10.1109/tcsvt.2025.3551766"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2025.3551766","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2025.3551766","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100624009","display_name":"Xuyang Liu","orcid":"https://orcid.org/0000-0002-0691-9371"},"institutions":[{"id":"https://openalex.org/I24185976","display_name":"Sichuan University","ror":"https://ror.org/011ashp19","country_code":"CN","type":"education","lineage":["https://openalex.org/I24185976"]},{"id":"https://openalex.org/I24201400","display_name":"Chengdu University of Information Technology","ror":"https://ror.org/01yxwrh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I24201400"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xuyang Liu","raw_affiliation_strings":["College of Electronics and Information Engineering, Sichuan University, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"College of Electronics and Information Engineering, Sichuan University, Chengdu, China","institution_ids":["https://openalex.org/I24201400","https://openalex.org/I24185976"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114244599","display_name":"Ting Liu","orcid":"https://orcid.org/0000-0003-2597-3020"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ting Liu","raw_affiliation_strings":["College of Systems Engineering, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"College of Systems Engineering, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043876209","display_name":"Siteng Huang","orcid":"https://orcid.org/0000-0002-9735-1186"},"institutions":[{"id":"https://openalex.org/I3133055985","display_name":"Westlake University","ror":"https://ror.org/05hfa4n20","country_code":"CN","type":"education","lineage":["https://openalex.org/I3133055985"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Siteng Huang","raw_affiliation_strings":["School of Engineering, Westlake University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Engineering, Westlake University, Hangzhou, China","institution_ids":["https://openalex.org/I3133055985"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yi Xin","orcid":"https://orcid.org/0009-0004-1113-6963"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi Xin","raw_affiliation_strings":["State Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053458494","display_name":"Yue Hu","orcid":"https://orcid.org/0000-0002-8115-7020"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yue Hu","raw_affiliation_strings":["College of Systems Engineering, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"College of Systems Engineering, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Long Qin","orcid":"https://orcid.org/0000-0003-1245-6622"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Long Qin","raw_affiliation_strings":["College of Systems Engineering, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"College of Systems Engineering, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100665183","display_name":"Donglin Wang","orcid":"https://orcid.org/0000-0003-1359-6440"},"institutions":[{"id":"https://openalex.org/I3133055985","display_name":"Westlake University","ror":"https://ror.org/05hfa4n20","country_code":"CN","type":"education","lineage":["https://openalex.org/I3133055985"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Donglin Wang","raw_affiliation_strings":["School of Engineering, Westlake University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Engineering, Westlake University, Hangzhou, China","institution_ids":["https://openalex.org/I3133055985"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042622978","display_name":"Yuanyuan Wu","orcid":"https://orcid.org/0000-0002-4761-9359"},"institutions":[{"id":"https://openalex.org/I31595395","display_name":"Chengdu University of Technology","ror":"https://ror.org/05pejbw21","country_code":"CN","type":"education","lineage":["https://openalex.org/I31595395"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuanyuan Wu","raw_affiliation_strings":["College of Computer Science and Cyber Security (Pilot Software College), Chengdu University of Technology, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Cyber Security (Pilot Software College), Chengdu University of Technology, Chengdu, China","institution_ids":["https://openalex.org/I31595395"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5063092133","display_name":"Honggang Chen","orcid":"https://orcid.org/0000-0002-0647-064X"},"institutions":[{"id":"https://openalex.org/I24185976","display_name":"Sichuan University","ror":"https://ror.org/011ashp19","country_code":"CN","type":"education","lineage":["https://openalex.org/I24185976"]},{"id":"https://openalex.org/I24201400","display_name":"Chengdu University of Information Technology","ror":"https://ror.org/01yxwrh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I24201400"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Honggang Chen","raw_affiliation_strings":["College of Electronics and Information Engineering, Sichuan University, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"College of Electronics and Information Engineering, Sichuan University, Chengdu, China","institution_ids":["https://openalex.org/I24201400","https://openalex.org/I24185976"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5100624009"],"corresponding_institution_ids":["https://openalex.org/I24185976","https://openalex.org/I24201400"],"apc_list":null,"apc_paid":null,"fwci":16.4979,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.98781028,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"36","issue":"2","first_page":"1341","last_page":"1354"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9926999807357788,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9926999807357788,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9811999797821045,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9749000072479248,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6914263367652893},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.6319082379341125},{"id":"https://openalex.org/keywords/expression","display_name":"Expression (computer science)","score":0.556517481803894},{"id":"https://openalex.org/keywords/comprehension","display_name":"Comprehension","score":0.43494173884391785},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4015684127807617},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.16940534114837646},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.08648237586021423}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6914263367652893},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.6319082379341125},{"id":"https://openalex.org/C90559484","wikidata":"https://www.wikidata.org/wiki/Q778379","display_name":"Expression (computer science)","level":2,"score":0.556517481803894},{"id":"https://openalex.org/C511192102","wikidata":"https://www.wikidata.org/wiki/Q5156948","display_name":"Comprehension","level":2,"score":0.43494173884391785},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4015684127807617},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.16940534114837646},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.08648237586021423},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2025.3551766","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2025.3551766","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3394021395","display_name":null,"funder_award_id":"2024YFHZ0212","funder_id":"https://openalex.org/F4320336756","funder_display_name":"Tianjin Science and Technology Program"},{"id":"https://openalex.org/G3457895640","display_name":null,"funder_award_id":"62103425","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3740411353","display_name":null,"funder_award_id":"2022CDSN-15-SCU","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"},{"id":"https://openalex.org/G4118848642","display_name":null,"funder_award_id":"SCU2023D062","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"},{"id":"https://openalex.org/G7546200249","display_name":null,"funder_award_id":"62001316","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8092987286","display_name":null,"funder_award_id":"62306329","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8321679603","display_name":null,"funder_award_id":"2024ZYD0263","funder_id":"https://openalex.org/F4320336756","funder_display_name":"Tianjin Science and Technology Program"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null},{"id":"https://openalex.org/F4320336756","display_name":"Tianjin Science and Technology Program","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":56,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1566289585","https://openalex.org/W1861492603","https://openalex.org/W2194775991","https://openalex.org/W2251512949","https://openalex.org/W2489434015","https://openalex.org/W2770129969","https://openalex.org/W2946086442","https://openalex.org/W2962764817","https://openalex.org/W2963109634","https://openalex.org/W2964284374","https://openalex.org/W2964345792","https://openalex.org/W2984121207","https://openalex.org/W2987734933","https://openalex.org/W3034772468","https://openalex.org/W3096609285","https://openalex.org/W3110435696","https://openalex.org/W3115894062","https://openalex.org/W3163747765","https://openalex.org/W3174965650","https://openalex.org/W3203354307","https://openalex.org/W3207127495","https://openalex.org/W4210820868","https://openalex.org/W4214490042","https://openalex.org/W4221166385","https://openalex.org/W4226024706","https://openalex.org/W4304891068","https://openalex.org/W4309181071","https://openalex.org/W4312651322","https://openalex.org/W4313054169","https://openalex.org/W4318954130","https://openalex.org/W4382463911","https://openalex.org/W4385245566","https://openalex.org/W4386071687","https://openalex.org/W4386076600","https://openalex.org/W4386212341","https://openalex.org/W4387986753","https://openalex.org/W4389520681","https://openalex.org/W4390871886","https://openalex.org/W4392543666","https://openalex.org/W4392909058","https://openalex.org/W4393159185","https://openalex.org/W4393159845","https://openalex.org/W4399205998","https://openalex.org/W4401607425","https://openalex.org/W4402667118","https://openalex.org/W4402671894","https://openalex.org/W4402702928","https://openalex.org/W4402727018","https://openalex.org/W4402754134","https://openalex.org/W4402769251","https://openalex.org/W4403791725","https://openalex.org/W4404011646","https://openalex.org/W4404783315","https://openalex.org/W4409369745","https://openalex.org/W4415795571"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2373862202","https://openalex.org/W2388232445"],"abstract_inverted_index":{"Referring":[0],"expression":[1],"comprehension":[2],"(REC)":[3],"is":[4,152],"a":[5,10,18],"vision-language":[6,25,65,81,114],"task":[7],"to":[8,54,75,111],"locate":[9],"target":[11],"object":[12],"in":[13],"an":[14],"image":[15],"based":[16],"on":[17],"language":[19],"expression.":[20],"Fully":[21],"fine-tuning":[22,129],"general-purpose":[23],"pre-trained":[24,64,105],"foundation":[26,66,82],"models":[27],"for":[28,116],"REC":[29,55],"yields":[30],"impressive":[31],"performance":[32,45],"but":[33],"becomes":[34],"increasingly":[35],"costly.":[36],"Parameter-efficient":[37],"transfer":[38],"learning":[39],"(PETL)":[40],"methods":[41],"have":[42],"shown":[43],"strong":[44],"with":[46,93],"fewer":[47],"tunable":[48,137],"parameters.":[49],"However,":[50],"directly":[51],"applying":[52],"PETL":[53,132],"faces":[56],"two":[57],"challenges:":[58],"(1)":[59],"insufficient":[60],"multi-modal":[61],"interaction":[62],"between":[63],"models,":[67],"and":[68,108,130,142],"(2)":[69],"high":[70],"GPU":[71,140],"memory":[72],"usage":[73],"due":[74],"gradients":[76],"passing":[77],"through":[78],"the":[79,104],"heavy":[80],"models.":[83],"To":[84],"this":[85],"end,":[86],"we":[87,102],"present":[88],"M2IST:":[89],"Multi-Modal":[90,97],"Interactive":[91,98],"Side-Tuning":[92],"M3ISAs:":[94],"Mixture":[95],"of":[96],"Side-Adapters.":[99],"During":[100],"fine-tuning,":[101],"fix":[103],"uni-modal":[106],"encoders":[107],"update":[109],"M3ISAs":[110],"enable":[112],"efficient":[113],"alignment":[115],"REC.":[117],"Empirical":[118],"results":[119],"reveal":[120],"that":[121],"M2IST":[122],"achieves":[123],"better":[124],"performance-efficiency":[125],"trade-off":[126],"than":[127],"full":[128],"other":[131],"methods,":[133],"requiring":[134],"only":[135],"2.11%":[136],"parameters,":[138],"39.61%":[139],"memory,":[141],"63.46%":[143],"training":[144],"time":[145],"while":[146],"maintaining":[147],"competitive":[148],"performance.":[149],"Our":[150],"code":[151],"released":[153],"at":[154],"https://github.com/xuyang-liu16/M2IST.":[155]},"counts_by_year":[{"year":2025,"cited_by_count":7}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
