{"id":"https://openalex.org/W4409129229","doi":"https://doi.org/10.1109/tmm.2025.3557700","title":"Cross-Modality Prompts: Few-Shot Multi-Label Recognition With Single-Label Training","display_name":"Cross-Modality Prompts: Few-Shot Multi-Label Recognition With Single-Label Training","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4409129229","doi":"https://doi.org/10.1109/tmm.2025.3557700"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2025.3557700","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2025.3557700","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Zixuan Ding","orcid":"https://orcid.org/0000-0002-2362-6802"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zixuan Ding","raw_affiliation_strings":["State Key Laboratory of Electromechanical Integrated Manufacturing of High-Performance Electronic Equipments, Xidian University, Xi'an, China","School of Mechano-Electronic Engineering, Xidian University, Xi&#x0027;an, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Electromechanical Integrated Manufacturing of High-Performance Electronic Equipments, Xidian University, Xi'an, China","institution_ids":["https://openalex.org/I149594827"]},{"raw_affiliation_string":"School of Mechano-Electronic Engineering, Xidian University, Xi&#x0027;an, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Zihan Zhou","orcid":"https://orcid.org/0009-0007-3658-1002"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zihan Zhou","raw_affiliation_strings":["School of Software, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Software, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100334231","display_name":"Hui Chen","orcid":"https://orcid.org/0000-0003-4180-5801"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hui Chen","raw_affiliation_strings":["Beijing National Research Center for Information Science and Technology, Tsinghua University, Beijing, China","Beijing National Research Center for Information Science and Technology (BNRist), Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing National Research Center for Information Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Beijing National Research Center for Information Science and Technology (BNRist), Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019176668","display_name":"Tianxiang Hao","orcid":"https://orcid.org/0000-0002-1952-6083"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianxiang Hao","raw_affiliation_strings":["School of Software, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Software, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102630950","display_name":"Yizhe Xiong","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yizhe Xiong","raw_affiliation_strings":["School of Software, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Software, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051149140","display_name":"Sicheng Zhao","orcid":"https://orcid.org/0000-0001-5843-6411"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sicheng Zhao","raw_affiliation_strings":["Beijing National Research Center for Information Science and Technology, Tsinghua University, Beijing, China","Beijing National Research Center for Information Science and Technology (BNRist), Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing National Research Center for Information Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Beijing National Research Center for Information Science and Technology (BNRist), Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Qiang Zhang","orcid":"https://orcid.org/0000-0002-2828-9905"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qiang Zhang","raw_affiliation_strings":["State Key Laboratory of Electromechanical Integrated Manufacturing of High-Performance Electronic Equipments, Xidian University, Xi'an, China","School of Mechano-Electronic Engineering, Xidian University, Xi&#x0027;an, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Electromechanical Integrated Manufacturing of High-Performance Electronic Equipments, Xidian University, Xi'an, China","institution_ids":["https://openalex.org/I149594827"]},{"raw_affiliation_string":"School of Mechano-Electronic Engineering, Xidian University, Xi&#x0027;an, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046605531","display_name":"Jungong Han","orcid":"https://orcid.org/0000-0003-4361-956X"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jungong Han","raw_affiliation_strings":["Beijing National Research Center for Information Science and Technology, Tsinghua University, Beijing, China","Beijing National Research Center for Information Science and Technology (BNRist), Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing National Research Center for Information Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Beijing National Research Center for Information Science and Technology (BNRist), Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I149594827"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.02815651,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"27","issue":null,"first_page":"3023","last_page":"3033"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.948199987411499,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.948199987411499,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9004999995231628,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8400928974151611},{"id":"https://openalex.org/keywords/shot","display_name":"Shot (pellet)","score":0.5670754313468933},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.5520164370536804},{"id":"https://openalex.org/keywords/multi-label-classification","display_name":"Multi-label classification","score":0.5311644673347473},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5145198702812195},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.43816059827804565},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.428615927696228},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.34729257225990295}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8400928974151611},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.5670754313468933},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.5520164370536804},{"id":"https://openalex.org/C2776482837","wikidata":"https://www.wikidata.org/wiki/Q3553958","display_name":"Multi-label classification","level":2,"score":0.5311644673347473},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5145198702812195},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.43816059827804565},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.428615927696228},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.34729257225990295},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2025.3557700","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2025.3557700","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G338250432","display_name":null,"funder_award_id":"62441235","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G826746073","display_name":null,"funder_award_id":"62271281","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":58,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W2007972815","https://openalex.org/W2250539671","https://openalex.org/W2783837693","https://openalex.org/W2943605315","https://openalex.org/W2964105864","https://openalex.org/W2970476646","https://openalex.org/W2982247743","https://openalex.org/W2987401211","https://openalex.org/W3035524453","https://openalex.org/W3035682985","https://openalex.org/W3044438666","https://openalex.org/W3087020912","https://openalex.org/W3094724482","https://openalex.org/W3095388829","https://openalex.org/W3097217077","https://openalex.org/W3108975329","https://openalex.org/W3119727976","https://openalex.org/W3135126802","https://openalex.org/W3160420592","https://openalex.org/W3173908982","https://openalex.org/W3174159092","https://openalex.org/W3185341429","https://openalex.org/W3197599296","https://openalex.org/W3198377975","https://openalex.org/W3202424564","https://openalex.org/W3207580420","https://openalex.org/W3215828051","https://openalex.org/W4205991051","https://openalex.org/W4212944375","https://openalex.org/W4214608919","https://openalex.org/W4312480274","https://openalex.org/W4386065712","https://openalex.org/W4386071547","https://openalex.org/W4386076454","https://openalex.org/W4386113259","https://openalex.org/W4390203943","https://openalex.org/W4390872773","https://openalex.org/W4391791458","https://openalex.org/W4400905856","https://openalex.org/W4402783842","https://openalex.org/W4407151317","https://openalex.org/W6717697761","https://openalex.org/W6736057607","https://openalex.org/W6748555532","https://openalex.org/W6751281049","https://openalex.org/W6753311412","https://openalex.org/W6755766585","https://openalex.org/W6758126075","https://openalex.org/W6759807521","https://openalex.org/W6779693928","https://openalex.org/W6784333009","https://openalex.org/W6790019176","https://openalex.org/W6791353385","https://openalex.org/W6797716411","https://openalex.org/W6800480908","https://openalex.org/W6802386650","https://openalex.org/W6810580206"],"related_works":["https://openalex.org/W2074502265","https://openalex.org/W4214877189","https://openalex.org/W2773965352","https://openalex.org/W2381179799","https://openalex.org/W2980279061","https://openalex.org/W2334685461","https://openalex.org/W2366718574","https://openalex.org/W2359774528","https://openalex.org/W4298312966","https://openalex.org/W2325697621"],"abstract_inverted_index":{"Few-shot":[0],"multi-label":[1,92],"recognition":[2],"(FS-MLR)":[3],"presents":[4],"a":[5,46,133],"significant":[6],"challenge":[7],"due":[8],"to":[9,12,16,25,61,73],"the":[10,27,33,63,76,109,129],"need":[11],"assign":[13],"multiple":[14],"labels":[15],"images":[17],"with":[18,132],"limited":[19,66],"examples.":[20],"Existing":[21],"methods":[22,54],"often":[23],"struggle":[24],"balance":[26],"learning":[28],"of":[29,35,65,112,136],"novel":[30,47],"classes":[31],"and":[32,91,101,119,138,147],"retention":[34],"knowledge":[36],"from":[37,99],"base":[38],"classes.":[39],"To":[40],"address":[41],"this":[42],"issue,":[43],"we":[44],"propose":[45],"Cross-Modality":[48],"Prompts":[49],"(CMP)":[50],"approach.":[51],"Unlike":[52],"conventional":[53],"that":[55,124],"rely":[56],"on":[57,105,128],"additional":[58],"semantic":[59],"information":[60],"mitigate":[62],"impact":[64],"samples,":[67],"our":[68,113],"approach":[69],"leverages":[70],"multimodal":[71],"prompts":[72],"adaptively":[74],"tune":[75],"feature":[77],"extraction":[78],"network.":[79],"A":[80],"new":[81],"FS-MLR":[82],"benchmark":[83,96],"is":[84],"also":[85],"proposed,":[86],"which":[87],"includes":[88],"single-label":[89],"training":[90],"testing,":[93],"accompanied":[94],"by":[95],"datasets":[97,107],"constructed":[98],"MS-COCO":[100,130],"NUS-WIDE.":[102],"Extensive":[103],"experiments":[104],"these":[106],"demonstrate":[108],"superior":[110],"performance":[111],"CMP":[114,125],"approach,":[115],"highlighting":[116],"its":[117],"effectiveness":[118],"adaptability.":[120],"Our":[121],"results":[122],"show":[123],"outperforms":[126],"CoOp":[127],"dataset":[131],"maximal":[134],"improvement":[135],"19.47%":[137],"23.94%":[139],"in":[140],"mAP<sub":[141],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[142],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">harmonic</sub>":[143],"for":[144],"5-way":[145,148],"1-shot":[146],"5-shot":[149],"settings,":[150],"respectively.":[151]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
