{"id":"https://openalex.org/W4403488669","doi":"https://doi.org/10.3233/faia240515","title":"MPT4LM: Multi-Modal Prompt Tuning Makes Pre-Trained Large Language Models Better Vision-Language Learners","display_name":"MPT4LM: Multi-Modal Prompt Tuning Makes Pre-Trained Large Language Models Better Vision-Language Learners","publication_year":2024,"publication_date":"2024-10-16","ids":{"openalex":"https://openalex.org/W4403488669","doi":"https://doi.org/10.3233/faia240515"},"language":"en","primary_location":{"id":"doi:10.3233/faia240515","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia240515","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.3233/faia240515","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101237510","display_name":"Yongzhu Miao","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yongzhu Miao","raw_affiliation_strings":["College of Computer Science and Technology, National University of Defense Technology, Changsha, P.R. China, miaoyz@nudt.edu.cn, tangjintao@nudt.edu.cn, shashali@nudt.edu.cn, tingwang@nudt.edu.cn"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, Changsha, P.R. China, miaoyz@nudt.edu.cn, tangjintao@nudt.edu.cn, shashali@nudt.edu.cn, tingwang@nudt.edu.cn","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032168150","display_name":"Jintao Tang","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jintao Tang","raw_affiliation_strings":["College of Computer Science and Technology, National University of Defense Technology, Changsha, P.R. China, miaoyz@nudt.edu.cn, tangjintao@nudt.edu.cn, shashali@nudt.edu.cn, tingwang@nudt.edu.cn"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, Changsha, P.R. China, miaoyz@nudt.edu.cn, tangjintao@nudt.edu.cn, shashali@nudt.edu.cn, tingwang@nudt.edu.cn","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023621058","display_name":"Shasha Li","orcid":"https://orcid.org/0000-0002-7581-1612"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shasha Li","raw_affiliation_strings":["College of Computer Science and Technology, National University of Defense Technology, Changsha, P.R. China, miaoyz@nudt.edu.cn, tangjintao@nudt.edu.cn, shashali@nudt.edu.cn, tingwang@nudt.edu.cn"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, Changsha, P.R. China, miaoyz@nudt.edu.cn, tangjintao@nudt.edu.cn, shashali@nudt.edu.cn, tingwang@nudt.edu.cn","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100428026","display_name":"Ting Wang","orcid":"https://orcid.org/0000-0003-4927-5833"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ting Wang","raw_affiliation_strings":["College of Computer Science and Technology, National University of Defense Technology, Changsha, P.R. China, miaoyz@nudt.edu.cn, tangjintao@nudt.edu.cn, shashali@nudt.edu.cn, tingwang@nudt.edu.cn"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, Changsha, P.R. China, miaoyz@nudt.edu.cn, tangjintao@nudt.edu.cn, shashali@nudt.edu.cn, tingwang@nudt.edu.cn","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101237510"],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.38869155,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.982699990272522,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9825000166893005,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.7138679623603821},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6582132577896118},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.43518173694610596},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3419725298881531},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.057951778173446655}],"concepts":[{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.7138679623603821},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6582132577896118},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43518173694610596},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3419725298881531},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.057951778173446655},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.3233/faia240515","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia240515","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"}],"best_oa_location":{"id":"doi:10.3233/faia240515","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia240515","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Pre-trained":[0],"Large":[1],"Language":[2,90],"Models":[3,91],"(LLMs)":[4],"have":[5],"demonstrated":[6],"prominent":[7],"generalization":[8],"to":[9,15,28,61,126],"various":[10],"linguistic":[11],"tasks.":[12],"However,":[13],"due":[14],"the":[16,56,78,121,128,134,180],"inherent":[17],"modality":[18],"and":[19,41,47,77,104,132,147,158,190,193],"task":[20],"discrepancy,":[21],"parameter-efficient":[22,57],"transfer":[23],"learning":[24],"for":[25,44,89],"adapting":[26],"LLMs":[27,60],"vision-language":[29],"(VL)":[30],"tasks":[31,63],"remains":[32],"challenging,":[33],"which":[34],"may":[35],"struggle":[36],"with":[37,107,120,174],"excessive":[38],"extra":[39],"computation":[40,189],"data":[42,191],"expenditure":[43],"VL":[45,62,151],"pre-training":[46,68],"disconnection":[48],"between":[49,188],"multi-modal":[50,66,81,112,137],"representations.":[51],"This":[52,93],"paper":[53],"concentrates":[54],"on":[55,69],"adaptation":[58],"of":[59,80,136,179,182],"without":[64],"inflexible":[65],"alignment":[67],"additional":[70],"image-text":[71],"pairs.":[72],"Inspired":[73],"by":[74],"Instruction":[75,110],"Tuning":[76,88],"nature":[79],"representation":[82],"learning,":[83],"we":[84],"propose":[85],"Multi-modal":[86],"Prompt":[87],"(MPT4LM).":[92],"method":[94],"provides":[95],"text-relevant":[96],"visual":[97],"prompts":[98,113],"via":[99],"a":[100,185],"plug-and-play":[101],"Cross-Attention":[102],"module":[103],"integrates":[105],"them":[106],"textual":[108],"Learnable":[109],"as":[111],"into":[114],"LLMs.":[115],"We":[116,139],"further":[117],"assemble":[118],"MPT4LM":[119,141,167],"currently":[122],"prevalent":[123],"Adapter":[124],"approach":[125],"alleviate":[127],"trainable":[129],"parameter":[130],"scale":[131],"facilitate":[133],"collaboration":[135],"prompts.":[138],"evaluate":[140],"upon":[142],"two":[143,150],"representative":[144],"LLMs:":[145],"LLAMA-2":[146],"Flan-T5,":[148],"over":[149],"tasks:":[152],"Visual":[153,159],"Question":[154],"Answering":[155],"(VQAv2.0,":[156],"GQA)":[157],"Entailment":[160],"(SNLI-VE).":[161],"Extensive":[162],"experimental":[163],"results":[164],"reveal":[165],"that":[166],"achieves":[168],"state-of-the-art":[169],"performance":[170],"among":[171],"prompting":[172],"methods":[173],"only":[175],"fine-tuning":[176],"about":[177],"0.65%":[178],"parameters":[181],"backbones,":[183],"indicating":[184],"better":[186],"trade-off":[187],"overhead":[192],"model":[194],"performance.":[195],"Our":[196],"code":[197],"is":[198],"available":[199],"at:":[200],"https://github.com/YzM1a0/MPT4LM.":[201]},"counts_by_year":[],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
