{"id":"https://openalex.org/W7137990809","doi":"https://doi.org/10.1609/aaai.v40i3.37181","title":"SteerMusic: Enhanced Musical Consistency for Zero-shot Text-Guided and Personalized Music Editing","display_name":"SteerMusic: Enhanced Musical Consistency for Zero-shot Text-Guided and Personalized Music Editing","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7137990809","doi":"https://doi.org/10.1609/aaai.v40i3.37181"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i3.37181","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i3.37181","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/37181/41143","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/37181/41143","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129730193","display_name":"Xinlei Niu","orcid":null},"institutions":[{"id":"https://openalex.org/I118347636","display_name":"Australian National University","ror":"https://ror.org/019wvm592","country_code":"AU","type":"education","lineage":["https://openalex.org/I118347636"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Xinlei Niu","raw_affiliation_strings":["Australian National University, Canberra, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Australian National University, Canberra, Australia","institution_ids":["https://openalex.org/I118347636"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069612434","display_name":"Kin Wai Cheuk","orcid":"https://orcid.org/0000-0003-3213-8242"},"institutions":[{"id":"https://openalex.org/I4210122684","display_name":"Sony Computer Science Laboratories","ror":"https://ror.org/02nc46417","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210122684"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Kin Wai Cheuk","raw_affiliation_strings":["Sony AI, Tokyo, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sony AI, Tokyo, Japan","institution_ids":["https://openalex.org/I4210122684"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129740627","display_name":"Jing Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I118347636","display_name":"Australian National University","ror":"https://ror.org/019wvm592","country_code":"AU","type":"education","lineage":["https://openalex.org/I118347636"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Jing Zhang","raw_affiliation_strings":["Australian National University, Canberra, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Australian National University, Canberra, Australia","institution_ids":["https://openalex.org/I118347636"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031225419","display_name":"Naoki Murata","orcid":"https://orcid.org/0000-0001-7418-5173"},"institutions":[{"id":"https://openalex.org/I4210122684","display_name":"Sony Computer Science Laboratories","ror":"https://ror.org/02nc46417","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210122684"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Naoki Murata","raw_affiliation_strings":["Sony AI, Tokyo, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sony AI, Tokyo, Japan","institution_ids":["https://openalex.org/I4210122684"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044626752","display_name":"Chieh-Hsin Lai","orcid":"https://orcid.org/0009-0009-3059-929X"},"institutions":[{"id":"https://openalex.org/I4210122684","display_name":"Sony Computer Science Laboratories","ror":"https://ror.org/02nc46417","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210122684"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Chieh-Hsin Lai","raw_affiliation_strings":["Sony AI, Tokyo, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sony AI, Tokyo, Japan","institution_ids":["https://openalex.org/I4210122684"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076690986","display_name":"Michele Mancusi","orcid":"https://orcid.org/0000-0003-1591-7076"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Michele Mancusi","raw_affiliation_strings":["Sony Europe B.V., Stuttgart, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sony Europe B.V., Stuttgart, Germany","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129687160","display_name":"Woosung Choi","orcid":null},"institutions":[{"id":"https://openalex.org/I4210122684","display_name":"Sony Computer Science Laboratories","ror":"https://ror.org/02nc46417","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210122684"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Woosung Choi","raw_affiliation_strings":["Sony AI, Tokyo, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sony AI, Tokyo, Japan","institution_ids":["https://openalex.org/I4210122684"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006089763","display_name":"Giorgio Fabbro","orcid":"https://orcid.org/0000-0002-6302-9033"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Giorgio Fabbro","raw_affiliation_strings":["Sony Europe B.V., Stuttgart, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sony Europe B.V., Stuttgart, Germany","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129683752","display_name":"Wei-Hsiang Liao","orcid":null},"institutions":[{"id":"https://openalex.org/I4210122684","display_name":"Sony Computer Science Laboratories","ror":"https://ror.org/02nc46417","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210122684"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Wei-Hsiang Liao","raw_affiliation_strings":["Sony AI, Tokyo, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sony AI, Tokyo, Japan","institution_ids":["https://openalex.org/I4210122684"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129091796","display_name":"Charles Patrick Martin","orcid":null},"institutions":[{"id":"https://openalex.org/I118347636","display_name":"Australian National University","ror":"https://ror.org/019wvm592","country_code":"AU","type":"education","lineage":["https://openalex.org/I118347636"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Charles Patrick Martin","raw_affiliation_strings":["Australian National University, Canberra, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Australian National University, Canberra, Australia","institution_ids":["https://openalex.org/I118347636"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129742851","display_name":"Yuki Mitsufuji","orcid":null},"institutions":[{"id":"https://openalex.org/I4210122684","display_name":"Sony Computer Science Laboratories","ror":"https://ror.org/02nc46417","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210122684"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yuki Mitsufuji","raw_affiliation_strings":["Sony AI, Tokyo, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sony AI, Tokyo, Japan","institution_ids":["https://openalex.org/I4210122684"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":11,"corresponding_author_ids":["https://openalex.org/A5129730193"],"corresponding_institution_ids":["https://openalex.org/I118347636"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.20833333,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"3","first_page":"2000","last_page":"2010"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.49239999055862427,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.49239999055862427,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.2240999937057495,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.07119999825954437,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.6887000203132629},{"id":"https://openalex.org/keywords/musical","display_name":"Musical","score":0.6194999814033508},{"id":"https://openalex.org/keywords/image-editing","display_name":"Image editing","score":0.4357999861240387},{"id":"https://openalex.org/keywords/video-editing","display_name":"Video editing","score":0.4099999964237213},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.38510000705718994},{"id":"https://openalex.org/keywords/music-information-retrieval","display_name":"Music information retrieval","score":0.3774999976158142}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7592999935150146},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.6887000203132629},{"id":"https://openalex.org/C558565934","wikidata":"https://www.wikidata.org/wiki/Q2743","display_name":"Musical","level":2,"score":0.6194999814033508},{"id":"https://openalex.org/C2776674983","wikidata":"https://www.wikidata.org/wiki/Q545981","display_name":"Image editing","level":3,"score":0.4357999861240387},{"id":"https://openalex.org/C2780310081","wikidata":"https://www.wikidata.org/wiki/Q1154312","display_name":"Video editing","level":2,"score":0.4099999964237213},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4023999869823456},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.38510000705718994},{"id":"https://openalex.org/C2777946086","wikidata":"https://www.wikidata.org/wiki/Q1163335","display_name":"Music information retrieval","level":3,"score":0.3774999976158142},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3337000012397766},{"id":"https://openalex.org/C88639978","wikidata":"https://www.wikidata.org/wiki/Q233861","display_name":"Musical notation","level":3,"score":0.3156999945640564},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.303600013256073},{"id":"https://openalex.org/C109568592","wikidata":"https://www.wikidata.org/wiki/Q207628","display_name":"Musical composition","level":3,"score":0.29420000314712524},{"id":"https://openalex.org/C2776539107","wikidata":"https://www.wikidata.org/wiki/Q176501","display_name":"Timbre","level":3,"score":0.29339998960494995},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2842999994754791},{"id":"https://openalex.org/C73520026","wikidata":"https://www.wikidata.org/wiki/Q7229091","display_name":"Pop music automation","level":4,"score":0.2791000008583069},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.27070000767707825},{"id":"https://openalex.org/C207347870","wikidata":"https://www.wikidata.org/wiki/Q371174","display_name":"Gesture","level":2,"score":0.25859999656677246},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.2558000087738037}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i3.37181","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i3.37181","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/37181/41143","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i3.37181","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i3.37181","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/37181/41143","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7137990809.pdf","grobid_xml":"https://content.openalex.org/works/W7137990809.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Music":[0],"editing":[1,23,64,88,102,118,150,162],"is":[2,84],"an":[3],"important":[4],"step":[5],"in":[6,143],"music":[7,63,75,101,120,146,161],"production,":[8],"which":[9],"has":[10],"broad":[11],"applications,":[12],"including":[13],"game":[14],"development":[15],"and":[16,73,149],"film":[17],"production.":[18],"Most":[19],"existing":[20,141],"zero-shot":[21,87],"text-guided":[22],"methods":[24,37,65,139,158],"rely":[25],"on":[26],"pretrained":[27],"diffusion":[28,33],"models":[29],"by":[30,76,103,129],"involving":[31],"forward-backward":[32],"processes.":[34],"However,":[35],"these":[36],"often":[38],"struggle":[39],"to":[40,51],"preserve":[41],"the":[42,54,68,71,117,130],"musical":[43,112,123],"content.":[44],"Additionally,":[45],"text":[46,131],"instructions":[47,132],"alone":[48],"usually":[49],"fail":[50],"accurately":[52],"describe":[53],"desired":[55],"music.":[56],"In":[57],"this":[58],"paper,":[59],"we":[60],"propose":[61],"two":[62],"that":[66,108,125,137,156],"improve":[67],"consistency":[69,148],"between":[70],"original":[72],"edited":[74],"leveraging":[77],"score":[78],"distillation.":[79],"The":[80,94],"first":[81],"method,":[82,96],"SteerMusic,":[83],"a":[85,105,110],"coarse-grained":[86],"approach":[89],"using":[90],"delta":[91],"denoising":[92],"score.":[93],"second":[95],"SteerMusic+,":[97],"enables":[98],"fine-grained":[99],"personalized":[100],"manipulating":[104],"concept":[106],"token":[107],"represents":[109],"user-defined":[111,122],"style.":[113],"SteerMusic+":[114],"allows":[115],"for":[116],"of":[119],"into":[121],"styles":[124],"cannot":[126],"be":[127],"achieved":[128],"alone.":[133],"Experimental":[134],"results":[135],"show":[136],"our":[138,157],"outperform":[140],"approaches":[142],"preserving":[144],"both":[145],"content":[147],"fidelity.":[151],"User":[152],"studies":[153],"further":[154],"validate":[155],"achieve":[159],"superior":[160],"quality.":[163]},"counts_by_year":[],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2026-03-18T00:00:00"}
