{"id":"https://openalex.org/W7108678312","doi":"https://doi.org/10.5281/zenodo.17811428","title":"Video-Guided Text-to-Music Generation Using Public Domain Movie Collections","display_name":"Video-Guided Text-to-Music Generation Using Public Domain Movie Collections","publication_year":2025,"publication_date":"2025-09-21","ids":{"openalex":"https://openalex.org/W7108678312","doi":"https://doi.org/10.5281/zenodo.17811428"},"language":null,"primary_location":{"id":"doi:10.5281/zenodo.17811428","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.17811428","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":""},"type":"article","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.5281/zenodo.17811428","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Haven Kim","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Haven Kim","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Zachary Novack","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zachary Novack","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Weihan Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Weihan Xu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Julian McAuley","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Julian McAuley","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Hao-Wen Dong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hao-Wen Dong","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.59855684,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.334199994802475,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.334199994802475,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.1762000024318695,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.1467999964952469,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/public-domain","display_name":"Public domain","score":0.536300003528595},{"id":"https://openalex.org/keywords/mood","display_name":"Mood","score":0.4075999855995178},{"id":"https://openalex.org/keywords/wavelet","display_name":"Wavelet","score":0.3398999869823456},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.3287000060081482},{"id":"https://openalex.org/keywords/music-information-retrieval","display_name":"Music information retrieval","score":0.3131999969482422}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6819000244140625},{"id":"https://openalex.org/C512654426","wikidata":"https://www.wikidata.org/wiki/Q19652","display_name":"Public domain","level":2,"score":0.536300003528595},{"id":"https://openalex.org/C2780733359","wikidata":"https://www.wikidata.org/wiki/Q331769","display_name":"Mood","level":2,"score":0.4075999855995178},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.37139999866485596},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.36090001463890076},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3571000099182129},{"id":"https://openalex.org/C47432892","wikidata":"https://www.wikidata.org/wiki/Q831390","display_name":"Wavelet","level":2,"score":0.3398999869823456},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.3287000060081482},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.313400000333786},{"id":"https://openalex.org/C2777946086","wikidata":"https://www.wikidata.org/wiki/Q1163335","display_name":"Music information retrieval","level":3,"score":0.3131999969482422},{"id":"https://openalex.org/C2993776861","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Open domain","level":3,"score":0.31299999356269836},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.301800012588501},{"id":"https://openalex.org/C2778739407","wikidata":"https://www.wikidata.org/wiki/Q165372","display_name":"CLIPS","level":2,"score":0.29829999804496765},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.2946999967098236},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.2590000033378601},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.25429999828338623}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.5281/zenodo.17811428","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.17811428","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":""}],"best_oa_location":{"id":"doi:10.5281/zenodo.17811428","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.17811428","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":""},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Despite":[0],"recent":[1],"advancements":[2],"in":[3,9,97,137,151],"music":[4,39,106],"generation":[5,107],"systems,":[6],"their":[7],"application":[8],"film":[10,105],"production":[11],"remains":[12],"limited,":[13],"as":[14,29],"they":[15],"struggle":[16],"to":[17],"capture":[18],"the":[19,48,92,99],"nuances":[20],"of":[21,50,71,94,101,139,143],"real-world":[22],"filmmaking,":[23],"where":[24],"filmmakers":[25],"consider":[26],"multiple":[27],"factors\u2014such":[28],"visual":[30],"content,":[31],"dialogue,":[32],"and":[33,86,145,148,153],"emotional":[34],"tone\u2014when":[35],"selecting":[36],"or":[37],"composing":[38],"for":[40],"a":[41,68,111],"scene.":[42],"This":[43],"limitation":[44],"primarily":[45],"stems":[46],"from":[47,74],"absence":[49],"comprehensive":[51],"datasets":[52],"that":[53,115,130],"integrate":[54],"these":[55],"elements.":[56],"To":[57,90],"address":[58],"this":[59],"gap,":[60],"we":[61,109],"introduce":[62,110],"Open":[63],"Screen":[64],"Sound":[65],"Library":[66],"(OSSL),":[67],"dataset":[69,96],"consisting":[70],"movie":[72],"clips":[73],"public":[75],"domain":[76],"films,":[77],"totaling":[78],"approximately":[79],"36.5":[80],"hours,":[81],"paired":[82,146],"with":[83],"high-quality":[84],"soundtracks":[85],"human-annotated":[87],"mood":[88,152],"information.":[89],"demonstrate":[91,129],"effectiveness":[93],"our":[95,131],"improving":[98],"performance":[100],"pre-trained":[102],"models":[103],"on":[104],"tasks,":[108],"new":[112],"video":[113],"adapter":[114],"enhances":[116,135],"an":[117],"autoregressive":[118],"transformer-based":[119],"text-to-music":[120],"model":[121],"by":[122],"adding":[123],"video-based":[124],"conditioning.":[125],"Our":[126],"experimental":[127],"results":[128],"proposed":[132],"approach":[133],"effectively":[134],"MusicGen-Medium":[136],"terms":[138],"both":[140],"objective":[141],"measures":[142],"distributional":[144],"fidelity,":[147],"subjective":[149],"compatibility":[150],"genre.":[154]},"counts_by_year":[],"updated_date":"2025-12-05T23:25:22.460635","created_date":"2025-12-05T00:00:00"}
