{"id":"https://openalex.org/W4403791622","doi":"https://doi.org/10.1145/3664647.3681492","title":"CoPL:Parameter-Efficient Collaborative Prompt Learning for Audio-Visual Tasks","display_name":"CoPL:Parameter-Efficient Collaborative Prompt Learning for Audio-Visual Tasks","publication_year":2024,"publication_date":"2024-10-26","ids":{"openalex":"https://openalex.org/W4403791622","doi":"https://doi.org/10.1145/3664647.3681492"},"language":"en","primary_location":{"id":"doi:10.1145/3664647.3681492","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3681492","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100981232","display_name":"Yihan Zhao","orcid":"https://orcid.org/0000-0002-8182-8437"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yihan Zhao","raw_affiliation_strings":["Xi'an Jiaotong University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"Xi'an Jiaotong University, Xi'an, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050052141","display_name":"Wei Xi","orcid":"https://orcid.org/0000-0001-9348-2982"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Xi","raw_affiliation_strings":["Xi'an Jiaotong University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"Xi'an Jiaotong University, Xi'an, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102384729","display_name":"Yuhang Cui","orcid":"https://orcid.org/0009-0001-9649-8098"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuhang Cui","raw_affiliation_strings":["Xi'an Jiaotong University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"Xi'an Jiaotong University, Xi'an, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024580488","display_name":"Gairui Bai","orcid":"https://orcid.org/0000-0003-0012-5068"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Gairui Bai","raw_affiliation_strings":["Xi'an Jiaotong University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"Xi'an Jiaotong University, Xi'an, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064781553","display_name":"Xinhui Liu","orcid":"https://orcid.org/0000-0002-4690-7076"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinhui Liu","raw_affiliation_strings":["Xi'an Jiaotong University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"Xi'an Jiaotong University, Xi'an, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101471775","display_name":"Jizhong Zhao","orcid":"https://orcid.org/0000-0002-6520-8238"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jizhong Zhao","raw_affiliation_strings":["Xi'an Jiaotong University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"Xi'an Jiaotong University, Xi'an, China","institution_ids":["https://openalex.org/I87445476"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100981232"],"corresponding_institution_ids":["https://openalex.org/I87445476"],"apc_list":null,"apc_paid":null,"fwci":0.7332,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.70761099,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"4455","last_page":"4464"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9944999814033508,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.8392260074615479},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8170952200889587},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.4629128575325012},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.43313083052635193},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.39322003722190857},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3700595498085022}],"concepts":[{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.8392260074615479},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8170952200889587},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.4629128575325012},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.43313083052635193},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.39322003722190857},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3700595498085022}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3664647.3681492","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3681492","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W2619697695","https://openalex.org/W2931433835","https://openalex.org/W2963680395","https://openalex.org/W2964109005","https://openalex.org/W2967197225","https://openalex.org/W3154807520","https://openalex.org/W3174906557","https://openalex.org/W3175300676","https://openalex.org/W3176445421","https://openalex.org/W3198377975","https://openalex.org/W4212774754","https://openalex.org/W4226206782","https://openalex.org/W4226442948","https://openalex.org/W4283709432","https://openalex.org/W4285300583","https://openalex.org/W4304098310","https://openalex.org/W4312310776","https://openalex.org/W4312349930","https://openalex.org/W4312380001","https://openalex.org/W4312415534","https://openalex.org/W4312460555","https://openalex.org/W4312651322","https://openalex.org/W4312884055","https://openalex.org/W4313123347","https://openalex.org/W4379116896","https://openalex.org/W4382202615","https://openalex.org/W4382465573","https://openalex.org/W4386047824","https://openalex.org/W4386065620","https://openalex.org/W4386071547","https://openalex.org/W4386075532","https://openalex.org/W4386113246","https://openalex.org/W4387086487","https://openalex.org/W4388192054","https://openalex.org/W4390871786","https://openalex.org/W4390872773","https://openalex.org/W4390874728","https://openalex.org/W4393159092","https://openalex.org/W4393160420","https://openalex.org/W6797613833"],"related_works":["https://openalex.org/W2271369634","https://openalex.org/W3147472394","https://openalex.org/W2047100085","https://openalex.org/W2350550760","https://openalex.org/W578794879","https://openalex.org/W2625296515","https://openalex.org/W3137890128","https://openalex.org/W1984634519","https://openalex.org/W4245955731","https://openalex.org/W2393726419"],"abstract_inverted_index":{"Parameter-Efficient":[0],"Fine":[1],"Tuning":[2],"(PEFT)":[3],"has":[4],"been":[5],"demonstrated":[6],"to":[7,16,23,66,99,110],"be":[8],"effective":[9],"and":[10,70,81],"efficient":[11],"for":[12,32,53,89],"transferring":[13],"foundation":[14],"models":[15,22],"downstream":[17,25,135],"tasks.":[18,55],"Transferring":[19],"pretrained":[20],"uni-modal":[21,69],"multi-modal":[24,34,42,54,71],"tasks":[26,136],"helps":[27],"alleviate":[28],"substantial":[29],"computational":[30],"costs":[31],"retraining":[33],"models.":[35],"However,":[36],"existing":[37],"approaches":[38],"primarily":[39],"focus":[40],"on":[41,132],"fusion,":[43],"while":[44,93,137],"neglecting":[45],"the":[46,74,94,116],"modal-specific":[47,79,85],"fine-tuning,":[48],"which":[49],"is":[50,108],"also":[51],"crucial":[52],"To":[56],"this":[57],"end,":[58],"we":[59],"propose":[60],"parameter-efficient":[61],"Collaborative":[62],"Prompt":[63],"Learning":[64],"(CoPL)":[65],"fine-tune":[67],"both":[68],"features.":[72],"Specifically,":[73],"collaborative":[75],"prompts":[76,80,86,96],"consist":[77],"of":[78],"modal-interaction":[82,95],"prompts.":[83],"The":[84],"are":[87,97],"tailored":[88],"fine-tuning":[90],"each":[91],"modality,":[92],"customized":[98],"explore":[100],"inter-modality":[101],"association.":[102],"Furthermore,":[103],"prompt":[104],"bank-based":[105],"mutual":[106],"coupling":[107],"introduced":[109],"extract":[111],"instance-level":[112],"features,":[113],"further":[114],"enhancing":[115],"model's":[117],"generalization":[118],"ability.":[119],"Extensive":[120],"experimental":[121],"results":[122],"demonstrate":[123],"that":[124],"our":[125],"approach":[126],"achieves":[127],"comparable":[128],"or":[129],"higher":[130],"performance":[131],"various":[133],"audio-visual":[134],"utilizing":[138],"approximately":[139],"1%":[140],"extra":[141],"trainable":[142],"parameters.":[143]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-03-09T08:58:05.943551","created_date":"2025-10-10T00:00:00"}
