{"id":"https://openalex.org/W4416036345","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.689","title":"MAVL: A Multilingual Audio-Video Lyrics Dataset for Animated Song Translation","display_name":"MAVL: A Multilingual Audio-Video Lyrics Dataset for Animated Song Translation","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4416036345","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.689"},"language":null,"primary_location":{"id":"doi:10.18653/v1/2025.emnlp-main.689","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.689","pdf_url":"https://aclanthology.org/2025.emnlp-main.689.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.emnlp-main.689.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5113237138","display_name":"W Cho","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Woohyun Cho","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100337295","display_name":"Youngmin Kim","orcid":"https://orcid.org/0000-0002-1295-1837"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Youngmin Kim","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113934521","display_name":"Sunghyun Lee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sunghyun Lee","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5101881857","display_name":"Youngjae Yu","orcid":"https://orcid.org/0000-0002-5867-0782"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Youngjae Yu","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5113237138"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.40752335,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"13651","last_page":"13679"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.7634999752044678,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.7634999752044678,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.03889999911189079,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.012299999594688416,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/lyrics","display_name":"Lyrics","score":0.5965999960899353},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.4097000062465668},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.2955000102519989}],"concepts":[{"id":"https://openalex.org/C2776436406","wikidata":"https://www.wikidata.org/wiki/Q602446","display_name":"Lyrics","level":2,"score":0.5965999960899353},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.5471000075340271},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5101000070571899},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4578000009059906},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44339999556541443},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.4097000062465668},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.2955000102519989},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.27799999713897705},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.26820001006126404},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.23639999330043793}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.emnlp-main.689","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.689","pdf_url":"https://aclanthology.org/2025.emnlp-main.689.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.emnlp-main.689","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.689","pdf_url":"https://aclanthology.org/2025.emnlp-main.689.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3034753964","display_name":null,"funder_award_id":"grant","funder_id":"https://openalex.org/F4320320671","funder_display_name":"National Research Foundation"},{"id":"https://openalex.org/G342704958","display_name":null,"funder_award_id":"funded","funder_id":"https://openalex.org/F4320322120","funder_display_name":"National Research Foundation of Korea"},{"id":"https://openalex.org/G6072120315","display_name":null,"funder_award_id":"funded","funder_id":"https://openalex.org/F4320335489","funder_display_name":"Institute for Information and Communications Technology Promotion"}],"funders":[{"id":"https://openalex.org/F4320320671","display_name":"National Research Foundation","ror":"https://ror.org/05s0g1g46"},{"id":"https://openalex.org/F4320322120","display_name":"National Research Foundation of Korea","ror":"https://ror.org/013aysd81"},{"id":"https://openalex.org/F4320328359","display_name":"Ministry of Science and ICT, South Korea","ror":"https://ror.org/01wpjm123"},{"id":"https://openalex.org/F4320335489","display_name":"Institute for Information and Communications Technology Promotion","ror":"https://ror.org/01g0hqq23"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416036345.pdf","grobid_xml":"https://content.openalex.org/works/W4416036345.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Lyrics":[0,33],"translation":[1],"requires":[2],"both":[3],"accurate":[4],"semantic":[5],"transfer":[6],"and":[7,14,27,52,57,78,96],"preservation":[8],"of":[9,102],"musical":[10],"rhythm,":[11],"syllabic":[12,80],"structure,":[13],"poetic":[15],"style.In":[16],"animated":[17],"musicals,":[18],"the":[19,40,100],"challenge":[20],"intensifies":[21],"due":[22],"to":[23,82],"alignment":[24],"with":[25,71],"visual":[26],"auditory":[28],"cues.We":[29],"introduce":[30],"Multilingual":[31],"Audio-Video":[32,69],"Benchmark":[34],"for":[35,45,106],"Animated":[36],"Song":[37],"Translation":[38],"(MAVL),":[39],"first":[41],"multilingual,":[42],"multimodal":[43],"benchmark":[44],"singable":[46],"lyrics":[47,107],"translation.By":[48],"integrating":[49],"text,":[50],"audio,":[51],"video,":[53],"MAVL":[54],"enables":[55],"richer":[56],"more":[58],"expressive":[59],"translations":[60],"than":[61],"textonly":[62],"approaches.Building":[63],"on":[64],"this,":[65],"we":[66],"propose":[67],"Syllable-Constrained":[68],"LLM":[70],"Chain-of-Thought":[72],"(SylAVL-CoT),":[73],"which":[74],"leverages":[75],"audio-video":[76],"cues":[77],"enforces":[79],"constraints":[81],"produce":[83],"natural-sounding":[84],"lyrics.Experimental":[85],"results":[86],"demonstrate":[87],"that":[88],"SylAVL-CoT":[89],"significantly":[90],"outperforms":[91],"textbased":[92],"models":[93],"in":[94],"singability":[95],"contextual":[97],"accuracy,":[98],"emphasizing":[99],"value":[101],"multimodal,":[103],"multilingual":[104],"approaches":[105],"translation.":[108]},"counts_by_year":[],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-11-08T00:00:00"}
