{"id":"https://openalex.org/W4417296283","doi":"https://doi.org/10.1145/3757374.3771475","title":"Video2Song: Video-to-Song Generation via Retrieval-Augmented Prompt Generation","display_name":"Video2Song: Video-to-Song Generation via Retrieval-Augmented Prompt Generation","publication_year":2025,"publication_date":"2025-12-13","ids":{"openalex":"https://openalex.org/W4417296283","doi":"https://doi.org/10.1145/3757374.3771475"},"language":null,"primary_location":{"id":"doi:10.1145/3757374.3771475","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3757374.3771475","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SIGGRAPH Asia 2025 Posters","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5120792211","display_name":"Zijiao Yin","orcid":"https://orcid.org/0009-0003-7832-7126"},"institutions":[{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zijiao Yin","raw_affiliation_strings":["School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China and MAIS, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0003-7832-7126","affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China and MAIS, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102871105","display_name":"Sifei Li","orcid":"https://orcid.org/0009-0003-9331-4593"},"institutions":[{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sifei Li","raw_affiliation_strings":["MAIS, Institute of Automation, Chinese Academy of Sciences, Beijing, China and School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0003-9331-4593","affiliations":[{"raw_affiliation_string":"MAIS, Institute of Automation, Chinese Academy of Sciences, Beijing, China and School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I4210165038"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5069298091","display_name":"Weiming Dong","orcid":"https://orcid.org/0000-0001-6502-145X"},"institutions":[{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weiming Dong","raw_affiliation_strings":["MAIS, Institute of Automation, Chinese Academy of Sciences, Beijing, China and School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-6502-145X","affiliations":[{"raw_affiliation_string":"MAIS, Institute of Automation, Chinese Academy of Sciences, Beijing, China and School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I4210165038"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5120792211"],"corresponding_institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.45712384,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"2"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.21619999408721924,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.21619999408721924,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.188400000333786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.15970000624656677,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.7171000242233276},{"id":"https://openalex.org/keywords/generator","display_name":"Generator (circuit theory)","score":0.6226000189781189},{"id":"https://openalex.org/keywords/controllability","display_name":"Controllability","score":0.5292999744415283},{"id":"https://openalex.org/keywords/natural-language-generation","display_name":"Natural language generation","score":0.5282999873161316},{"id":"https://openalex.org/keywords/rhythm","display_name":"Rhythm","score":0.42239999771118164},{"id":"https://openalex.org/keywords/video-recording","display_name":"Video recording","score":0.3522999882698059}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7368999719619751},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.7171000242233276},{"id":"https://openalex.org/C2780992000","wikidata":"https://www.wikidata.org/wiki/Q17016113","display_name":"Generator (circuit theory)","level":3,"score":0.6226000189781189},{"id":"https://openalex.org/C48209547","wikidata":"https://www.wikidata.org/wiki/Q1331104","display_name":"Controllability","level":2,"score":0.5292999744415283},{"id":"https://openalex.org/C2776187449","wikidata":"https://www.wikidata.org/wiki/Q1513879","display_name":"Natural language generation","level":3,"score":0.5282999873161316},{"id":"https://openalex.org/C135343436","wikidata":"https://www.wikidata.org/wiki/Q170406","display_name":"Rhythm","level":2,"score":0.42239999771118164},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37959998846054077},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3700000047683716},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.36039999127388},{"id":"https://openalex.org/C2988634675","wikidata":"https://www.wikidata.org/wiki/Q34508","display_name":"Video recording","level":2,"score":0.3522999882698059},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.32850000262260437},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.29319998621940613},{"id":"https://openalex.org/C60048249","wikidata":"https://www.wikidata.org/wiki/Q37437","display_name":"Syntax","level":2,"score":0.2915000021457672},{"id":"https://openalex.org/C3019307219","wikidata":"https://www.wikidata.org/wiki/Q5476245","display_name":"Fourth generation","level":3,"score":0.2624000012874603},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.2547000050544739},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.2515999972820282},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.25119999051094055}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3757374.3771475","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3757374.3771475","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SIGGRAPH Asia 2025 Posters","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":1,"referenced_works":["https://openalex.org/W2970641574"],"related_works":[],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3],"introduce":[4],"an":[5],"end-to-end":[6],"framework":[7,47],"for":[8],"generating":[9],"video":[10,29,66],"soundtracks":[11],"that":[12],"enhances":[13],"content":[14],"alignment":[15,71],"and":[16,31,39,72,81],"controllability":[17],"compared":[18],"to":[19,27,35,54],"existing":[20],"approaches.":[21],"Leveraging":[22],"a":[23,43,49,56],"video-language":[24],"model":[25,52],"(VLM)":[26],"interpret":[28],"semantics":[30],"retrieval-augmented":[32],"generation":[33],"(RAG)":[34],"extract":[36],"compatible":[37],"style":[38],"tempo":[40],"cues":[41],"from":[42],"music":[44,57],"corpus,":[45],"our":[46],"conditions":[48],"large":[50],"language":[51],"(LLM)":[53],"guide":[55],"generator":[58],"in":[59],"producing":[60],"high-fidelity":[61],"soundtracks.":[62],"Evaluations":[63],"across":[64],"diverse":[65],"genres":[67],"demonstrate":[68],"superior":[69],"semantic":[70],"rhythmic":[73],"synchronization,":[74],"as":[75,77],"well":[76],"the":[78],"framework\u2019s":[79],"effectiveness":[80],"generalizability.":[82],"Project":[83],"page:":[84],"https://github.com/zijiao1108/Video2Song.":[85]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-12-13T00:00:00"}
