{"id":"https://openalex.org/W4408354438","doi":"https://doi.org/10.1109/icassp49660.2025.10890072","title":"Aligned Contrastive Learning for Text-to-Music Retrieval","display_name":"Aligned Contrastive Learning for Text-to-Music Retrieval","publication_year":2025,"publication_date":"2025-03-12","ids":{"openalex":"https://openalex.org/W4408354438","doi":"https://doi.org/10.1109/icassp49660.2025.10890072"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49660.2025.10890072","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10890072","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5113730169","display_name":"Tatsuya Komatsu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210096607","display_name":"Line Corporation (Japan)","ror":"https://ror.org/00qg8pm87","country_code":"JP","type":"company","lineage":["https://openalex.org/I4210096607","https://openalex.org/I60922564"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Tatsuya Komatsu","raw_affiliation_strings":["LY Corporation,Tokyo,Japan"],"affiliations":[{"raw_affiliation_string":"LY Corporation,Tokyo,Japan","institution_ids":["https://openalex.org/I4210096607"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109021921","display_name":"Hokuto Munakata","orcid":null},"institutions":[{"id":"https://openalex.org/I4210096607","display_name":"Line Corporation (Japan)","ror":"https://ror.org/00qg8pm87","country_code":"JP","type":"company","lineage":["https://openalex.org/I4210096607","https://openalex.org/I60922564"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Hokuto Munakata","raw_affiliation_strings":["LY Corporation,Tokyo,Japan"],"affiliations":[{"raw_affiliation_string":"LY Corporation,Tokyo,Japan","institution_ids":["https://openalex.org/I4210096607"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036067847","display_name":"Takuya Hasumi","orcid":null},"institutions":[{"id":"https://openalex.org/I4210096607","display_name":"Line Corporation (Japan)","ror":"https://ror.org/00qg8pm87","country_code":"JP","type":"company","lineage":["https://openalex.org/I4210096607","https://openalex.org/I60922564"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Takuya Hasumi","raw_affiliation_strings":["LY Corporation,Tokyo,Japan"],"affiliations":[{"raw_affiliation_string":"LY Corporation,Tokyo,Japan","institution_ids":["https://openalex.org/I4210096607"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044818016","display_name":"Yusuke Fujita","orcid":"https://orcid.org/0000-0002-6523-8146"},"institutions":[{"id":"https://openalex.org/I4210096607","display_name":"Line Corporation (Japan)","ror":"https://ror.org/00qg8pm87","country_code":"JP","type":"company","lineage":["https://openalex.org/I4210096607","https://openalex.org/I60922564"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yusuke Fujita","raw_affiliation_strings":["LY Corporation,Tokyo,Japan"],"affiliations":[{"raw_affiliation_string":"LY Corporation,Tokyo,Japan","institution_ids":["https://openalex.org/I4210096607"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5113730169"],"corresponding_institution_ids":["https://openalex.org/I4210096607"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.05615229,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13996","display_name":"Diverse Musicological Studies","score":0.955299973487854,"subfield":{"id":"https://openalex.org/subfields/1210","display_name":"Music"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.927299976348877,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.784133791923523},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5955832600593567},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4845377802848816},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4837491512298584},{"id":"https://openalex.org/keywords/music-information-retrieval","display_name":"Music information retrieval","score":0.41777247190475464},{"id":"https://openalex.org/keywords/musical","display_name":"Musical","score":0.08615940809249878}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.784133791923523},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5955832600593567},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4845377802848816},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4837491512298584},{"id":"https://openalex.org/C2777946086","wikidata":"https://www.wikidata.org/wiki/Q1163335","display_name":"Music information retrieval","level":3,"score":0.41777247190475464},{"id":"https://openalex.org/C558565934","wikidata":"https://www.wikidata.org/wiki/Q2743","display_name":"Musical","level":2,"score":0.08615940809249878},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49660.2025.10890072","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10890072","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.5600000023841858}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W1558543783","https://openalex.org/W2027518030","https://openalex.org/W2116373735","https://openalex.org/W2154016477","https://openalex.org/W2905804369","https://openalex.org/W2981571772","https://openalex.org/W3162583214","https://openalex.org/W4221157007","https://openalex.org/W4225314222","https://openalex.org/W4225328971","https://openalex.org/W4372259760","https://openalex.org/W4372260310","https://openalex.org/W4372266552","https://openalex.org/W6633499030","https://openalex.org/W6715395060","https://openalex.org/W6765174816","https://openalex.org/W6770649906","https://openalex.org/W6778572914","https://openalex.org/W6781425395","https://openalex.org/W6791353385","https://openalex.org/W6803752119","https://openalex.org/W6804120432","https://openalex.org/W6843026064"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W3204019825"],"abstract_inverted_index":{"This":[0,71],"paper":[1],"proposes":[2],"aligned":[3],"contrastive":[4],"learning":[5],"for":[6],"text-to-music":[7],"retrieval.":[8],"The":[9],"proposed":[10],"method":[11,43],"introduces":[12],"a":[13,92,102,106],"new":[14],"similarity":[15,52,65],"measure,":[16],"'aligned":[17],"similarity',":[18],"which":[19],"captures":[20],"the":[21,45,49,58,68,74,112,124],"frame-level":[22],"and":[23,28,39,61,81,127],"token-level":[24],"correspondence":[25],"within":[26],"text":[27,46,82],"audio":[29,59,80,117],"sequences.":[30],"Unlike":[31],"traditional":[32],"approaches":[33],"that":[34,83],"aggregate":[35],"sequence":[36,60],"into":[37,91],"clip-level":[38],"sentence-level":[40],"embeddings,":[41],"our":[42],"aligns":[44],"token":[47],"exhibiting":[48],"highest":[50],"cosine":[51],"with":[53,101,120],"each":[54],"temporal":[55],"frame":[56],"of":[57,76],"averages":[62],"these":[63],"maximum":[64],"values":[66],"across":[67],"entire":[69],"sequence.":[70],"approach":[72],"enables":[73],"capture":[75],"fine-grained":[77],"relationships":[78],"between":[79],"are":[84,89],"often":[85],"overlooked":[86],"when":[87],"sequences":[88],"aggregated":[90],"single":[93],"embedding.":[94],"Retrieval":[95],"experiments":[96],"show":[97],"significant":[98],"performance":[99],"improvements,":[100],"notable":[103],"gain":[104],"being":[105],"17.8%":[107],"increase":[108],"in":[109],"Recall@5.":[110],"Moreover,":[111],"alignment":[113],"elucidates":[114],"how":[115],"specific":[116],"frames":[118],"correlate":[119],"textual":[121],"tokens,":[122],"enhancing":[123],"model's":[125],"transparency":[126],"interpretability.":[128]},"counts_by_year":[],"updated_date":"2025-12-28T23:10:05.387466","created_date":"2025-10-10T00:00:00"}
