{"id":"https://openalex.org/W4391164151","doi":"https://doi.org/10.1109/taslp.2024.3358048","title":"Cross-Modal Interaction via Reinforcement Feedback for Audio-Lyrics Retrieval","display_name":"Cross-Modal Interaction via Reinforcement Feedback for Audio-Lyrics Retrieval","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4391164151","doi":"https://doi.org/10.1109/taslp.2024.3358048"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2024.3358048","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/taslp.2024.3358048","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100639988","display_name":"Dong Zhou","orcid":"https://orcid.org/0000-0002-3310-8347"},"institutions":[{"id":"https://openalex.org/I186272606","display_name":"Guangdong University of Foreign Studies","ror":"https://ror.org/00fhc9y79","country_code":"CN","type":"education","lineage":["https://openalex.org/I186272606"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Dong Zhou","raw_affiliation_strings":["School of Information Science and Technology, Guangdong University of Foreign Studies, Guangzhou, Guangdong, China"],"affiliations":[{"raw_affiliation_string":"School of Information Science and Technology, Guangdong University of Foreign Studies, Guangzhou, Guangdong, China","institution_ids":["https://openalex.org/I186272606"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115076807","display_name":"Fang Lei","orcid":"https://orcid.org/0009-0003-8661-1952"},"institutions":[{"id":"https://openalex.org/I121296143","display_name":"Hunan University of Science and Technology","ror":"https://ror.org/02m9vrb24","country_code":"CN","type":"education","lineage":["https://openalex.org/I121296143"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fang Lei","raw_affiliation_strings":["School of Computer Science and Engineering, Hunan University of Science and Technology, Xiangtan, Hunan, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Hunan University of Science and Technology, Xiangtan, Hunan, China","institution_ids":["https://openalex.org/I121296143"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100412815","display_name":"Lin Li","orcid":"https://orcid.org/0000-0001-7553-6916"},"institutions":[{"id":"https://openalex.org/I196699116","display_name":"Wuhan University of Technology","ror":"https://ror.org/03fe7t173","country_code":"CN","type":"education","lineage":["https://openalex.org/I196699116"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lin Li","raw_affiliation_strings":["School of Computer Science and Artificial Intelligence, Wuhan University of Technology, Wuhan, Hubei, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Artificial Intelligence, Wuhan University of Technology, Wuhan, Hubei, China","institution_ids":["https://openalex.org/I196699116"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115076610","display_name":"Yongmei Zhou","orcid":"https://orcid.org/0000-0003-2661-3078"},"institutions":[{"id":"https://openalex.org/I186272606","display_name":"Guangdong University of Foreign Studies","ror":"https://ror.org/00fhc9y79","country_code":"CN","type":"education","lineage":["https://openalex.org/I186272606"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongmei Zhou","raw_affiliation_strings":["School of Information Science and Technology, Guangdong University of Foreign Studies, Guangzhou, Guangdong, China"],"affiliations":[{"raw_affiliation_string":"School of Information Science and Technology, Guangdong University of Foreign Studies, Guangzhou, Guangdong, China","institution_ids":["https://openalex.org/I186272606"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101732032","display_name":"Aimin Yang","orcid":"https://orcid.org/0009-0002-1751-4801"},"institutions":[{"id":"https://openalex.org/I154833797","display_name":"Lingnan Normal University","ror":"https://ror.org/01h6ecw13","country_code":"CN","type":"education","lineage":["https://openalex.org/I154833797"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Aimin Yang","raw_affiliation_strings":["School of Computer Science and Intelligence Education, Lingnan Normal University, Zhanjiang, Guangdong, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Intelligence Education, Lingnan Normal University, Zhanjiang, Guangdong, China","institution_ids":["https://openalex.org/I154833797"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100639988"],"corresponding_institution_ids":["https://openalex.org/I186272606"],"apc_list":null,"apc_paid":null,"fwci":0.7471,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.6433046,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":"32","issue":null,"first_page":"1248","last_page":"1260"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9907000064849854,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/lyrics","display_name":"Lyrics","score":0.8177968263626099},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7554711103439331},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6199313402175903},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.6196337938308716},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6130630373954773},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.6073789000511169},{"id":"https://openalex.org/keywords/pairwise-comparison","display_name":"Pairwise comparison","score":0.5064581632614136},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4505326747894287},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.42944806814193726},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.4164063334465027},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.40251919627189636},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.36898839473724365},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3660135269165039}],"concepts":[{"id":"https://openalex.org/C2776436406","wikidata":"https://www.wikidata.org/wiki/Q602446","display_name":"Lyrics","level":2,"score":0.8177968263626099},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7554711103439331},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6199313402175903},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.6196337938308716},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6130630373954773},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.6073789000511169},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.5064581632614136},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4505326747894287},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.42944806814193726},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.4164063334465027},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.40251919627189636},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.36898839473724365},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3660135269165039},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2024.3358048","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/taslp.2024.3358048","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.7900000214576721}],"awards":[{"id":"https://openalex.org/G3104299295","display_name":null,"funder_award_id":"62376062","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6217162777","display_name":null,"funder_award_id":"2022JJ30020","funder_id":"https://openalex.org/F4320322843","funder_display_name":"Natural Science Foundation of\u00a0Hunan Province"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322843","display_name":"Natural Science Foundation of\u00a0Hunan Province","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W1966247273","https://openalex.org/W2010486494","https://openalex.org/W2029163572","https://openalex.org/W2346298959","https://openalex.org/W2407555063","https://openalex.org/W2526050071","https://openalex.org/W2593116425","https://openalex.org/W2796207103","https://openalex.org/W2896457183","https://openalex.org/W2902907165","https://openalex.org/W2940092410","https://openalex.org/W2963367210","https://openalex.org/W2963435138","https://openalex.org/W2963997278","https://openalex.org/W2964051877","https://openalex.org/W3015591594","https://openalex.org/W3034419097","https://openalex.org/W3043840704","https://openalex.org/W3094496301","https://openalex.org/W3094550259","https://openalex.org/W3108240585","https://openalex.org/W3123348991","https://openalex.org/W3177151075","https://openalex.org/W3197988356","https://openalex.org/W3203004311","https://openalex.org/W3208601917","https://openalex.org/W4205605843","https://openalex.org/W4210913346","https://openalex.org/W4221157007","https://openalex.org/W4224933373","https://openalex.org/W4225307240","https://openalex.org/W4226292363","https://openalex.org/W4249992252","https://openalex.org/W4283384437","https://openalex.org/W4307646385","https://openalex.org/W4323644175","https://openalex.org/W4385245566","https://openalex.org/W6697272128","https://openalex.org/W6750041603","https://openalex.org/W6766582784","https://openalex.org/W6780316881","https://openalex.org/W6803701585","https://openalex.org/W6843663730"],"related_works":["https://openalex.org/W73545470","https://openalex.org/W4224266612","https://openalex.org/W2383394264","https://openalex.org/W4320153225","https://openalex.org/W4293261942","https://openalex.org/W3125968744","https://openalex.org/W203959209","https://openalex.org/W2110287964","https://openalex.org/W2167701463","https://openalex.org/W4307407935"],"abstract_inverted_index":{"The":[0,186],"task":[1],"of":[2,159,175,208],"retrieving":[3,55],"audio":[4,67,177],"content":[5],"relevant":[6],"to":[7,27,116,145],"lyric":[8],"queries":[9],"and":[10,53,68,78,196,206],"vice":[11],"versa":[12],"plays":[13],"a":[14,43,71,102,112,166],"critical":[15],"role":[16],"in":[17,70,92],"music-oriented":[18],"applications.":[19],"In":[20,162],"this":[21],"process,":[22],"robust":[23],"feature":[24],"representations":[25,52,82,95,125],"have":[26],"be":[28,39],"learned":[29],"for":[30,96,181],"two":[31],"modalities.":[32],"Furthermore,":[33],"interactions":[34,65,79,110],"between":[35,56,66,80],"different":[36,57],"modalities":[37,58,128],"should":[38],"properly":[40],"captured":[41],"at":[42],"fine-grained":[44],"level.":[45],"Existing":[46],"approaches":[47,63],"can":[48],"effectively":[49],"extract":[50],"modal":[51],"perform":[54],"through":[59],"alignment.":[60],"However,":[61],"these":[62,142],"model":[64],"lyrics":[69,180],"coarse-grained":[72],"manner.":[73],"Especially":[74],"the":[75,85,148,157,182,191,197,204],"input":[76,150],"features":[77,151],"enhanced":[81],"produced":[83],"by":[84],"alignment":[86],"module":[87],"are":[88],"largely":[89],"ignored,":[90],"resulting":[91],"low-quality":[93],"modality":[94],"final":[97],"retrieval.":[98],"This":[99],"paper":[100],"presents":[101],"novel":[103,167],"method":[104],"named":[105],"CMRF":[106,209],"that":[107],"accomplishes":[108],"cross-modal":[109,132],"via":[111,129,152],"reinforcement":[113,153],"feedback":[114],"procedure":[115],"learn":[117],"high-quality":[118],"multi-modal":[119,160],"embeddings.":[120,161],"Initially,":[121],"we":[122,164],"implicitly":[123],"assimilate":[124],"across":[126],"distinct":[127],"directional":[130],"pairwise":[131],"attention.":[133],"Subsequently,":[134],"our":[135],"approach":[136],"recurrently":[137],"identifies":[138],"pivotal":[139],"constituents":[140],"within":[141],"elevated-level":[143],"attributes":[144],"engage":[146],"with":[147,178,212],"primary":[149],"learning,":[154],"thus":[155],"augmenting":[156],"quality":[158],"addition,":[163],"introduce":[165],"audio-lyrics":[168,183],"dataset":[169,195,199],"<italic":[170,192,200],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[171,193,201],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">AL-song</i>,":[172],"which":[173],"consists":[174],"paired":[176],"corresponding":[179],"retrieval":[184],"task.":[185],"empirical":[187],"findings":[188],"derived":[189],"from":[190],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">AL-song</i>":[194],"benchmark":[198],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">Sounddescs</i>":[202],"substantiate":[203],"efficacy":[205],"efficiency":[207],"when":[210],"juxtaposed":[211],"state-of-the-art":[213],"methodologies.":[214]},"counts_by_year":[{"year":2024,"cited_by_count":2}],"updated_date":"2025-12-25T23:11:45.687758","created_date":"2025-10-10T00:00:00"}
