{"id":"https://openalex.org/W4413120455","doi":"https://doi.org/10.1109/tai.2025.3596925","title":"Prompt-Aware Adapter: Learning Adaptive Visual Tokens for Multimodal Large Language Models","display_name":"Prompt-Aware Adapter: Learning Adaptive Visual Tokens for Multimodal Large Language Models","publication_year":2025,"publication_date":"2025-08-08","ids":{"openalex":"https://openalex.org/W4413120455","doi":"https://doi.org/10.1109/tai.2025.3596925"},"language":"en","primary_location":{"id":"doi:10.1109/tai.2025.3596925","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tai.2025.3596925","pdf_url":null,"source":{"id":"https://openalex.org/S4210169448","display_name":"IEEE Transactions on Artificial Intelligence","issn_l":"2691-4581","issn":["2691-4581"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Yue Zhang","orcid":"https://orcid.org/0000-0002-0431-6390"},"institutions":[{"id":"https://openalex.org/I168879160","display_name":"Zhejiang University of Science and Technology","ror":"https://ror.org/05mx0wr29","country_code":"CN","type":"education","lineage":["https://openalex.org/I168879160"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yue Zhang","raw_affiliation_strings":["College of Computer Science and Technology, Zhejiang University, Zhejiang, China"],"raw_orcid":"https://orcid.org/0000-0002-0431-6390","affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Zhejiang University, Zhejiang, China","institution_ids":["https://openalex.org/I168879160"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Hehe Fan","orcid":"https://orcid.org/0000-0001-9572-2345"},"institutions":[{"id":"https://openalex.org/I168879160","display_name":"Zhejiang University of Science and Technology","ror":"https://ror.org/05mx0wr29","country_code":"CN","type":"education","lineage":["https://openalex.org/I168879160"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hehe Fan","raw_affiliation_strings":["College of Computer Science and Technology, Zhejiang University, Zhejiang, China"],"raw_orcid":"https://orcid.org/0000-0001-9572-2345","affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Zhejiang University, Zhejiang, China","institution_ids":["https://openalex.org/I168879160"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101397422","display_name":"Wei Ji","orcid":"https://orcid.org/0000-0002-8106-9768"},"institutions":[{"id":"https://openalex.org/I308837","display_name":"Suzhou University of Science and Technology","ror":"https://ror.org/04en8wb91","country_code":"CN","type":"education","lineage":["https://openalex.org/I308837"]},{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]},{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Ji","raw_affiliation_strings":["School of Intelligence Science and Technology, Nanjing University, Suzhou, Jiangsu, China","School of Intelligence Science and Technology, Nanjing University, Jiangsu, Suzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-8106-9768","affiliations":[{"raw_affiliation_string":"School of Intelligence Science and Technology, Nanjing University, Suzhou, Jiangsu, China","institution_ids":["https://openalex.org/I308837","https://openalex.org/I881766915"]},{"raw_affiliation_string":"School of Intelligence Science and Technology, Nanjing University, Jiangsu, Suzhou, China","institution_ids":["https://openalex.org/I308837","https://openalex.org/I36399199"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020006712","display_name":"Yongkang Wong","orcid":"https://orcid.org/0000-0002-1239-4428"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Yongkang Wong","raw_affiliation_strings":["School of Computing, National University of Singapore, Singapore"],"raw_orcid":"https://orcid.org/0000-0002-1239-4428","affiliations":[{"raw_affiliation_string":"School of Computing, National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058575315","display_name":"Roger Zimmermann","orcid":"https://orcid.org/0000-0002-7410-2590"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Roger Zimmermann","raw_affiliation_strings":["School of Computing, National University of Singapore, Singapore"],"raw_orcid":"https://orcid.org/0000-0002-7410-2590","affiliations":[{"raw_affiliation_string":"School of Computing, National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5005421447","display_name":"Yi Yang","orcid":"https://orcid.org/0000-0002-0512-880X"},"institutions":[{"id":"https://openalex.org/I168879160","display_name":"Zhejiang University of Science and Technology","ror":"https://ror.org/05mx0wr29","country_code":"CN","type":"education","lineage":["https://openalex.org/I168879160"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi Yang","raw_affiliation_strings":["College of Computer Science and Technology, Zhejiang University, Zhejiang, China"],"raw_orcid":"https://orcid.org/0000-0002-0512-880X","affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Zhejiang University, Zhejiang, China","institution_ids":["https://openalex.org/I168879160"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I168879160"],"apc_list":null,"apc_paid":null,"fwci":4.1623,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.94438164,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":"7","issue":"3","first_page":"1355","last_page":"1364"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9440000057220459,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9409999847412109,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/adapter","display_name":"Adapter (computing)","score":0.8317487835884094},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7157588005065918},{"id":"https://openalex.org/keywords/language-acquisition","display_name":"Language acquisition","score":0.4309547245502472},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.41900891065597534},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3799898028373718},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3449854254722595},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.33482879400253296},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.18782392144203186},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.10583668947219849}],"concepts":[{"id":"https://openalex.org/C177284502","wikidata":"https://www.wikidata.org/wiki/Q1005390","display_name":"Adapter (computing)","level":2,"score":0.8317487835884094},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7157588005065918},{"id":"https://openalex.org/C74672266","wikidata":"https://www.wikidata.org/wiki/Q815859","display_name":"Language acquisition","level":2,"score":0.4309547245502472},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41900891065597534},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3799898028373718},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3449854254722595},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.33482879400253296},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.18782392144203186},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.10583668947219849},{"id":"https://openalex.org/C145420912","wikidata":"https://www.wikidata.org/wiki/Q853077","display_name":"Mathematics education","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tai.2025.3596925","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tai.2025.3596925","pdf_url":null,"source":{"id":"https://openalex.org/S4210169448","display_name":"IEEE Transactions on Artificial Intelligence","issn_l":"2691-4581","issn":["2691-4581"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3765385956","display_name":null,"funder_award_id":"62472381","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W1773149199","https://openalex.org/W1861492603","https://openalex.org/W2470673105","https://openalex.org/W2489434015","https://openalex.org/W2560730294","https://openalex.org/W2886641317","https://openalex.org/W2963109634","https://openalex.org/W2963518342","https://openalex.org/W3004268082","https://openalex.org/W3035588244","https://openalex.org/W3038528491","https://openalex.org/W4200474852","https://openalex.org/W4205474609","https://openalex.org/W4312846625","https://openalex.org/W4386076522","https://openalex.org/W4400803523","https://openalex.org/W4401043272","https://openalex.org/W4402753980","https://openalex.org/W4402952414","https://openalex.org/W4410536678","https://openalex.org/W4416707474","https://openalex.org/W7108068597"],"related_works":["https://openalex.org/W2133028525","https://openalex.org/W4229060448","https://openalex.org/W4306381730","https://openalex.org/W2981692913","https://openalex.org/W3044188621","https://openalex.org/W3184035966","https://openalex.org/W2485605994","https://openalex.org/W2160602540","https://openalex.org/W4385571108","https://openalex.org/W4200251711"],"abstract_inverted_index":{"To":[0,79],"bridge":[1],"the":[2,39,46,58,63,69,92,101,105,119,125,138,162],"gap":[3],"between":[4],"vision":[5],"and":[6,60,113,130,144,158],"language":[7],"modalities,":[8],"Multimodal":[9],"Large":[10,26],"Language":[11,27],"Models":[12,28],"(MLLMs)":[13],"usually":[14],"learn":[15],"an":[16],"adapter":[17],"that":[18],"converts":[19],"visual":[20,35,97,122,146,151],"inputs":[21,98],"to":[22,54,94,117,142],"understandable":[23],"tokens":[24],"for":[25,72],"(LLMs).":[29],"However,":[30],"most":[31,120],"adapters":[32,50,88,109],"generate":[33],"consistent":[34],"tokens,":[36],"regardless":[37],"of":[38,42,104,140,164],"specific":[40,102],"objects":[41],"interest":[43],"mentioned":[44],"in":[45,57],"prompt.":[47,106],"Since":[48],"these":[49],"distribute":[51],"equal":[52],"attention":[53],"every":[55],"detail":[56],"image":[59],"focus":[61,103],"on":[62,100,149],"entire":[64],"scene,":[65],"they":[66],"may":[67],"increase":[68],"cognitive":[70],"load":[71],"LLMs,":[73],"particularly":[74],"when":[75],"processing":[76],"complex":[77],"scenes.":[78],"alleviate":[80],"this":[81],"problem,":[82],"we":[83],"propose":[84],"prompt-aware":[85,108,165],"adapters.":[86,166],"These":[87],"are":[89],"designed":[90],"with":[91],"capability":[93],"dynamically":[95],"embed":[96],"based":[99],"Specifically,":[107],"utilize":[110],"both":[111,128],"global":[112],"local":[114],"textual":[115],"features":[116],"capture":[118],"relevant":[121],"clues":[123],"from":[124],"prompt":[126],"at":[127],"coarse":[129],"fine":[131],"granularity":[132],"levels.":[133],"This":[134],"approach":[135],"significantly":[136],"enhances":[137],"ability":[139],"LLMs":[141],"understand":[143],"interpret":[145],"content.":[147],"Experiments":[148],"various":[150],"question":[152],"answering":[153],"tasks,":[154],"such":[155],"as":[156],"counting":[157],"position":[159],"reasoning,":[160],"demonstrate":[161],"effectiveness":[163],"Code":[167],"is":[168],"at:":[169],"https://github.com/YueCheong/prompt-aware-adapter.":[170]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3}],"updated_date":"2026-05-15T08:27:34.491423","created_date":"2025-10-10T00:00:00"}
