{"id":"https://openalex.org/W4415538079","doi":"https://doi.org/10.1145/3746027.3755810","title":"MAP: Parameter-Efficient Tuning for Referring Expression Comprehension via Multi-Modal Adaptive Positional Encoding","display_name":"MAP: Parameter-Efficient Tuning for Referring Expression Comprehension via Multi-Modal Adaptive Positional Encoding","publication_year":2025,"publication_date":"2025-10-25","ids":{"openalex":"https://openalex.org/W4415538079","doi":"https://doi.org/10.1145/3746027.3755810"},"language":null,"primary_location":{"id":"doi:10.1145/3746027.3755810","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3755810","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5053695187","display_name":"Ruilin Yao","orcid":"https://orcid.org/0009-0002-6654-2294"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ruilin Yao","raw_affiliation_strings":["Wuhan University of Technology, Wuhan, Hubei, China, Foundation Model Research Center, Institute of Automation, Chinese Academy of Sciences, Beijing, China, and Shanghai Artificial Intelligence Laboratory, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0002-6654-2294","affiliations":[{"raw_affiliation_string":"Wuhan University of Technology, Wuhan, Hubei, China, Foundation Model Research Center, Institute of Automation, Chinese Academy of Sciences, Beijing, China, and Shanghai Artificial Intelligence Laboratory, Shanghai, China","institution_ids":["https://openalex.org/I4210100255"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101967703","display_name":"Yi Rong","orcid":"https://orcid.org/0000-0003-4867-6811"},"institutions":[{"id":"https://openalex.org/I196699116","display_name":"Wuhan University of Technology","ror":"https://ror.org/03fe7t173","country_code":"CN","type":"education","lineage":["https://openalex.org/I196699116"]},{"id":"https://openalex.org/I43922553","display_name":"Wuhan University of Science and Technology","ror":"https://ror.org/00e4hrk88","country_code":"CN","type":"education","lineage":["https://openalex.org/I43922553"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi Rong","raw_affiliation_strings":["Wuhan University of Technology, Wuhan, Hubei, China and Sanya Science and Education Innovation Park, Wuhan University of Technology, Sanya, Hainan, China"],"raw_orcid":"https://orcid.org/0000-0003-4867-6811","affiliations":[{"raw_affiliation_string":"Wuhan University of Technology, Wuhan, Hubei, China and Sanya Science and Education Innovation Park, Wuhan University of Technology, Sanya, Hainan, China","institution_ids":["https://openalex.org/I43922553","https://openalex.org/I196699116"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077894997","display_name":"Tianyu Zou","orcid":null},"institutions":[{"id":"https://openalex.org/I196699116","display_name":"Wuhan University of Technology","ror":"https://ror.org/03fe7t173","country_code":"CN","type":"education","lineage":["https://openalex.org/I196699116"]},{"id":"https://openalex.org/I43922553","display_name":"Wuhan University of Science and Technology","ror":"https://ror.org/00e4hrk88","country_code":"CN","type":"education","lineage":["https://openalex.org/I43922553"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianyu Zou","raw_affiliation_strings":["Wuhan University of Technology, Wuhan, Hubei, China and Sanya Science and Education Innovation Park, Wuhan University of Technology, Sanya, Hainan, China"],"raw_orcid":"https://orcid.org/0009-0007-1451-0019","affiliations":[{"raw_affiliation_string":"Wuhan University of Technology, Wuhan, Hubei, China and Sanya Science and Education Innovation Park, Wuhan University of Technology, Sanya, Hainan, China","institution_ids":["https://openalex.org/I43922553","https://openalex.org/I196699116"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101440426","display_name":"Bo Zhang","orcid":"https://orcid.org/0009-0007-7302-4627"},"institutions":[{"id":"https://openalex.org/I196699116","display_name":"Wuhan University of Technology","ror":"https://ror.org/03fe7t173","country_code":"CN","type":"education","lineage":["https://openalex.org/I196699116"]},{"id":"https://openalex.org/I43922553","display_name":"Wuhan University of Science and Technology","ror":"https://ror.org/00e4hrk88","country_code":"CN","type":"education","lineage":["https://openalex.org/I43922553"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bo Zhang","raw_affiliation_strings":["Sanya Science and Education Innovation Park, Wuhan University of Technology, Sanya, Hainan, China and Wuhan University of Technology, Wuhan, Hubei, China"],"raw_orcid":"https://orcid.org/0009-0007-7302-4627","affiliations":[{"raw_affiliation_string":"Sanya Science and Education Innovation Park, Wuhan University of Technology, Sanya, Hainan, China and Wuhan University of Technology, Wuhan, Hubei, China","institution_ids":["https://openalex.org/I43922553","https://openalex.org/I196699116"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jian Li","orcid":"https://orcid.org/0000-0002-0242-6481"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jian Li","raw_affiliation_strings":["Nanjing University, Nanjing, Jiangsu, China"],"raw_orcid":"https://orcid.org/0000-0002-0242-6481","affiliations":[{"raw_affiliation_string":"Nanjing University, Nanjing, Jiangsu, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101598103","display_name":"Shengwu Xiong","orcid":"https://orcid.org/0000-0002-3836-0664"},"institutions":[{"id":"https://openalex.org/I196699116","display_name":"Wuhan University of Technology","ror":"https://ror.org/03fe7t173","country_code":"CN","type":"education","lineage":["https://openalex.org/I196699116"]},{"id":"https://openalex.org/I4210100789","display_name":"Wuhan College","ror":"https://ror.org/01dashf18","country_code":"CN","type":"nonprofit","lineage":["https://openalex.org/I4210100789"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shengwu Xiong","raw_affiliation_strings":["Interdisciplinary Artificial Intelligence Research Institute, Wuhan College, Wuhan, Hubei, China","School of Mathematics and Statistics, Wuhan University of Technology, Wuhan, Hubei, China"],"raw_orcid":"https://orcid.org/0000-0002-3836-0664","affiliations":[{"raw_affiliation_string":"Interdisciplinary Artificial Intelligence Research Institute, Wuhan College, Wuhan, Hubei, China","institution_ids":["https://openalex.org/I4210100789"]},{"raw_affiliation_string":"School of Mathematics and Statistics, Wuhan University of Technology, Wuhan, Hubei, China","institution_ids":["https://openalex.org/I196699116"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5003832579","display_name":"Shili Xiong","orcid":"https://orcid.org/0000-0003-1167-525X"},"institutions":[{"id":"https://openalex.org/I196699116","display_name":"Wuhan University of Technology","ror":"https://ror.org/03fe7t173","country_code":"CN","type":"education","lineage":["https://openalex.org/I196699116"]},{"id":"https://openalex.org/I4210100789","display_name":"Wuhan College","ror":"https://ror.org/01dashf18","country_code":"CN","type":"nonprofit","lineage":["https://openalex.org/I4210100789"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shili Xiong","raw_affiliation_strings":["Interdisciplinary Artificial Intelligence Research Institute, Wuhan College, Wuhan, Hubei, China","School of Mathematics and Statistics, Wuhan University of Technology, Wuhan, Hubei, China"],"raw_orcid":"https://orcid.org/0000-0003-1167-525X","affiliations":[{"raw_affiliation_string":"Interdisciplinary Artificial Intelligence Research Institute, Wuhan College, Wuhan, Hubei, China","institution_ids":["https://openalex.org/I4210100789"]},{"raw_affiliation_string":"School of Mathematics and Statistics, Wuhan University of Technology, Wuhan, Hubei, China","institution_ids":["https://openalex.org/I196699116"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5053695187"],"corresponding_institution_ids":["https://openalex.org/I4210100255"],"apc_list":null,"apc_paid":null,"fwci":1.1332,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.83519595,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"2264","last_page":"2273"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.5800999999046326},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5554999709129333},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.507099986076355},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5008999705314636},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.49869999289512634},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.46880000829696655},{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.4668999910354614},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.4447999894618988}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8237000107765198},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6518999934196472},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.5800999999046326},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5554999709129333},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.507099986076355},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5008999705314636},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.49869999289512634},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.46880000829696655},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.4668999910354614},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.4447999894618988},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4327000081539154},{"id":"https://openalex.org/C90559484","wikidata":"https://www.wikidata.org/wiki/Q778379","display_name":"Expression (computer science)","level":2,"score":0.4154999852180481},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.40059998631477356},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.35429999232292175},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.33649998903274536},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.3037000000476837},{"id":"https://openalex.org/C177284502","wikidata":"https://www.wikidata.org/wiki/Q1005390","display_name":"Adapter (computing)","level":2,"score":0.29980000853538513},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2896000146865845},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.28929999470710754},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.2870999872684479},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.2720000147819519},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.2547999918460846}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3746027.3755810","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3755810","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6356431383","display_name":null,"funder_award_id":"2022ZD0160604","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"}],"funders":[{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W1536680647","https://openalex.org/W1773149199","https://openalex.org/W2006147162","https://openalex.org/W2185175083","https://openalex.org/W2277195237","https://openalex.org/W2489434015","https://openalex.org/W2962764817","https://openalex.org/W2962766617","https://openalex.org/W2963109634","https://openalex.org/W2964345792","https://openalex.org/W2984121207","https://openalex.org/W2986755220","https://openalex.org/W2987734933","https://openalex.org/W3034772468","https://openalex.org/W3096609285","https://openalex.org/W3159619744","https://openalex.org/W4214490042","https://openalex.org/W4312651322","https://openalex.org/W4313145013","https://openalex.org/W4385571188","https://openalex.org/W4386071547","https://openalex.org/W4386076636","https://openalex.org/W4386212341","https://openalex.org/W4387272106","https://openalex.org/W4400275574","https://openalex.org/W4404011646"],"related_works":[],"abstract_inverted_index":{"This":[0,62],"paper":[1],"studies":[2],"the":[3,15,71,79,100,145],"challenging":[4],"task":[5],"of":[6,107,126,139,148],"Referring":[7],"Expression":[8],"Comprehension":[9],"(REC),":[10],"which":[11,98],"aims":[12],"at":[13],"detecting":[14],"text-referred":[16],"target":[17,80],"object":[18],"in":[19,58,67],"an":[20],"input":[21,140],"image.":[22],"To":[23,82],"achieve":[24],"this,":[25],"most":[26],"recent":[27],"works":[28],"attempt":[29],"to":[30,46,76,165,180],"adapt":[31],"powerful":[32],"pretrained":[33,56],"models":[34],"through":[35],"integrating":[36],"additional":[37],"structures":[38],"(e.g.,":[39],"low-rank":[40],"adaptation":[41],"(LoRA)":[42],"or":[43],"adapter":[44],"modules)":[45],"enable":[47],"efficient":[48],"parameter":[49],"tuning.":[50],"However,":[51],"all":[52],"these":[53],"methods":[54,183],"process":[55],"features":[57],"a":[59,87,104,124],"position-agnostic":[60],"manner.":[61],"will":[63],"limit":[64],"their":[65],"effectiveness":[66],"REC":[68,166],"tasks,":[69],"where":[70],"positional":[72,108,116,146],"information":[73,147],"is":[74,198],"essential":[75],"correctly":[77],"localize":[78],"object.":[81],"this":[83,143],"end,":[84],"we":[85],"propose":[86],"novel":[88],"parameter-efficient":[89,192],"tuning":[90,193],"approach,":[91],"named":[92],"Multi-Modal":[93],"Adaptive":[94],"Positional":[95],"Encoding":[96],"(MAP),":[97],"addresses":[99],"above":[101],"problem":[102],"from":[103,123],"new":[105],"perspective":[106],"encoding.":[109],"More":[110],"specifically,":[111],"MAP":[112,176],"first":[113],"generates":[114],"initial":[115],"embeddings":[117],"for":[118],"different":[119,149],"visual":[120],"encoder":[121],"layers":[122],"set":[125],"learnable":[127],"vectors,":[128],"and":[129,156,189],"then":[130],"adjusts":[131],"them":[132],"adaptively":[133],"based":[134],"on":[135,170],"spatial-wise":[136],"visual-linguistic":[137],"correlations":[138],"data.":[141],"In":[142],"way,":[144],"image":[150],"tokens":[151],"can":[152],"be":[153],"appropriately":[154],"modeled":[155],"utilized":[157],"by":[158],"MAP,":[159],"thus":[160],"making":[161],"it":[162],"more":[163],"applicable":[164],"tasks.":[167],"Extensive":[168],"experiments":[169],"five":[171],"widely-used":[172],"datasets":[173],"demonstrate":[174],"that":[175],"achieves":[177],"comparable":[178],"results":[179],"full":[181],"fine-tuning":[182],"with":[184],"much":[185],"fewer":[186],"extra":[187],"parameters":[188],"outperforms":[190],"other":[191],"approaches.":[194],"Our":[195],"source":[196],"code":[197],"available":[199],"at:":[200],"https://github.com/Mr-Bigworth/MAP.":[201]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-25T00:00:00"}
