{"id":"https://openalex.org/W4412889611","doi":"https://doi.org/10.18653/v1/2025.acl-long.1514","title":"A Parameter-Efficient and Fine-Grained Prompt Learning for Vision-Language Models","display_name":"A Parameter-Efficient and Fine-Grained Prompt Learning for Vision-Language Models","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4412889611","doi":"https://doi.org/10.18653/v1/2025.acl-long.1514"},"language":"en","primary_location":{"id":"doi:10.18653/v1/2025.acl-long.1514","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.acl-long.1514","pdf_url":"https://aclanthology.org/2025.acl-long.1514.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.acl-long.1514.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5035480317","display_name":"Yongbin Guo","orcid":"https://orcid.org/0000-0002-0337-6762"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yongbin Guo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101646382","display_name":"Shuzhen Li","orcid":"https://orcid.org/0000-0002-8336-092X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shuzhen Li","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029578429","display_name":"Zhulin Liu","orcid":"https://orcid.org/0000-0003-4145-823X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhulin Liu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100378750","display_name":"Tong Zhang","orcid":"https://orcid.org/0000-0001-5818-4285"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tong Zhang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100643265","display_name":"C. L. Philip Chen","orcid":"https://orcid.org/0000-0001-5451-7230"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"C.L.Philip Chen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.9349,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.77192982,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"31346","last_page":"31359"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9850999712944031,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9778000116348267,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7399202585220337},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4555742144584656},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4347150921821594}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7399202585220337},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4555742144584656},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4347150921821594}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.acl-long.1514","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.acl-long.1514","pdf_url":"https://aclanthology.org/2025.acl-long.1514.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.acl-long.1514","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.acl-long.1514","pdf_url":"https://aclanthology.org/2025.acl-long.1514.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1487701234","display_name":null,"funder_award_id":"62222603","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3852078645","display_name":null,"funder_award_id":"STI2030","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4412889611.pdf","grobid_xml":"https://content.openalex.org/works/W4412889611.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Current":[0],"vision-language":[1,51,144],"models":[2,52],"(VLMs)":[3],"understand":[4],"complex":[5,128,169],"vision-text":[6,129],"tasks":[7],"by":[8],"extracting":[9,17],"overall":[10],"semantic":[11,25,57,122,136],"information":[12,67],"from":[13,18,75],"largescale":[14],"cross-modal":[15,20],"associations.However,":[16],"large-scale":[19],"associations":[21],"often":[22],"smooths":[23],"out":[24],"details":[26],"and":[27,36,78,88,154,164],"requires":[28],"large":[29],"computations,":[30],"limiting":[31],"multimodal":[32],"fine-grained":[33,55,103],"understanding":[34,126],"performance":[35],"efficiency.To":[37],"address":[38],"this":[39,41],"issue,":[40],"paper":[42],"proposes":[43],"a":[44],"detail-oriented":[45,92,109],"prompt":[46,87,90,93,110],"learning":[47],"(DoPL)":[48],"method":[49],"for":[50,112,143],"to":[53,64,84,101,116],"implement":[54,117],"multi-modal":[56,170],"alignment":[58,82,105,137],"with":[59,138],"merely":[60],"0.25M":[61],"trainable":[62],"parameters.According":[63],"the":[65,97,149,162],"low-entropy":[66],"concentration":[68],"theory,":[69],"DoPL":[70,107,167],"explores":[71],"shared":[72],"interest":[73],"tokens":[74],"text-vision":[76,104],"correlations":[77],"transforms":[79],"them":[80],"into":[81],"weights":[83],"enhance":[85],"text":[86],"vision":[89],"via":[91],"generation.It":[94],"effectively":[95],"guides":[96],"current":[98],"frozen":[99,114],"layer":[100,115],"extract":[102],"cues.Furthermore,":[106],"constructs":[108],"generation":[111],"each":[113],"layer-by-layer":[118],"localization":[119],"of":[120,166],"finegrained":[121,135],"alignment,":[123],"achieving":[124],"precise":[125],"in":[127,133,168],"tasks.DoPL":[130],"performs":[131],"well":[132],"parameter-efficient":[134,151],"only":[139],"0.12%":[140],"tunable":[141],"parameters":[142],"models.The":[145],"state-of-the-art":[146],"results":[147],"over":[148],"previous":[150],"fine-tuning":[152,156],"methods":[153],"full":[155],"approaches":[157],"on":[158],"six":[159],"benchmarks":[160],"demonstrate":[161],"effectiveness":[163],"efficiency":[165],"tasks.F":[171],"L":[172],"I":[173],"C":[174],"K":[175,179],"R":[176],"3":[177],"0":[178],"9":[180,184,187,190,193,194],"8":[181,185,188,191],".":[182,195],"2":[183],".4":[186],".6":[189],".8":[192]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
