{"id":"https://openalex.org/W6930717756","doi":"https://doi.org/10.5281/zenodo.14877417","title":"Audio Prompt Adapter: Unleashing Music Editing Abilities for Text-to-Music With Lightweight Finetuning","display_name":"Audio Prompt Adapter: Unleashing Music Editing Abilities for Text-to-Music With Lightweight Finetuning","publication_year":2024,"publication_date":"2024-11-10","ids":{"openalex":"https://openalex.org/W6930717756","doi":"https://doi.org/10.5281/zenodo.14877417"},"language":"en","primary_location":{"id":"doi:10.5281/zenodo.14877417","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.14877417","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":""},"type":"article","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.5281/zenodo.14877417","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Fang Duo Tsai","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Fang Duo Tsai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Shih-Lun Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shih-Lun Wu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Haven Kim","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Haven Kim","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Bo-Yu Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bo-Yu Chen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Hao-Chung Cheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hao-Chung Cheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Yi-Hsuan Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yi-Hsuan Yang","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.46889306,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T10051","display_name":"Asthma and respiratory diseases","score":0.49129998683929443,"subfield":{"id":"https://openalex.org/subfields/2737","display_name":"Physiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T10051","display_name":"Asthma and respiratory diseases","score":0.49129998683929443,"subfield":{"id":"https://openalex.org/subfields/2737","display_name":"Physiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11456","display_name":"Neuroscience of respiration and sleep","score":0.15289999544620514,"subfield":{"id":"https://openalex.org/subfields/2807","display_name":"Endocrine and Autonomic Systems"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T13338","display_name":"Pharmacological Effects and Assays","score":0.06310000270605087,"subfield":{"id":"https://openalex.org/subfields/1103","display_name":"Animal Science and Zoology"},"field":{"id":"https://openalex.org/fields/11","display_name":"Agricultural and Biological Sciences"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/timbre","display_name":"Timbre","score":0.8194000124931335},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.5235999822616577},{"id":"https://openalex.org/keywords/musical","display_name":"Musical","score":0.45399999618530273},{"id":"https://openalex.org/keywords/adapter","display_name":"Adapter (computing)","score":0.4146000146865845},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.32019999623298645},{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.31470000743865967}],"concepts":[{"id":"https://openalex.org/C2776539107","wikidata":"https://www.wikidata.org/wiki/Q176501","display_name":"Timbre","level":3,"score":0.8194000124931335},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7797999978065491},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.5309000015258789},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.5235999822616577},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.4700999855995178},{"id":"https://openalex.org/C558565934","wikidata":"https://www.wikidata.org/wiki/Q2743","display_name":"Musical","level":2,"score":0.45399999618530273},{"id":"https://openalex.org/C177284502","wikidata":"https://www.wikidata.org/wiki/Q1005390","display_name":"Adapter (computing)","level":2,"score":0.4146000146865845},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.38499999046325684},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.32019999623298645},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.31470000743865967},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.30309998989105225},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2854999899864197},{"id":"https://openalex.org/C64922751","wikidata":"https://www.wikidata.org/wiki/Q4650799","display_name":"Audio signal","level":3,"score":0.28450000286102295},{"id":"https://openalex.org/C128422554","wikidata":"https://www.wikidata.org/wiki/Q20077126","display_name":"Sound recording and reproduction","level":2,"score":0.2791999876499176},{"id":"https://openalex.org/C2777946086","wikidata":"https://www.wikidata.org/wiki/Q1163335","display_name":"Music information retrieval","level":3,"score":0.27390000224113464},{"id":"https://openalex.org/C135641252","wikidata":"https://www.wikidata.org/wiki/Q738567","display_name":"Multimodal interaction","level":2,"score":0.2635999917984009},{"id":"https://openalex.org/C127220857","wikidata":"https://www.wikidata.org/wiki/Q2719318","display_name":"Audio signal processing","level":4,"score":0.2529999911785126}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.5281/zenodo.14877417","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.14877417","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":""}],"best_oa_location":{"id":"doi:10.5281/zenodo.14877417","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.14877417","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":""},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.6240912675857544}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Text-to-music":[0],"models":[1],"allow":[2],"users":[3,89],"to":[4,20,51,58,69,90],"generate":[5],"nearly":[6],"realistic":[7],"musical":[8],"audio":[9,30,108],"with":[10],"textual":[11],"commands.":[12],"However,":[13],"editing":[14],"music":[15],"audios":[16,140],"remains":[17],"challenging":[18],"due":[19],"the":[21,29,62,74,106],"conflicting":[22],"desiderata":[23],"of":[24,77,103],"performing":[25],"fine-grained":[26],"alterations":[27],"on":[28,123,138],"while":[31],"maintaining":[32],"a":[33,48,79,110],"simple":[34],"user":[35],"interface.":[36],"To":[37],"address":[38],"this":[39],"challenge,":[40],"we":[41,120,134],"propose":[42],"Audio":[43],"Prompt":[44],"Adapter":[45],"(or":[46],"AP-Adapter),":[47],"lightweight":[49],"addition":[50],"pretrained":[52],"text-to-music":[53,81],"models.":[54],"We":[55],"utilize":[56],"AudioMAE":[57],"extract":[59],"features":[60,72],"from":[61],"input":[63],"audio,":[64],"and":[65,96,98,109,117,130],"construct":[66],"attention-based":[67],"adapters":[68],"feed":[70],"these":[71],"into":[73],"internal":[75],"layers":[76],"AudioLDM2,":[78],"diffusion-based":[80],"model.":[82],"With":[83],"22M":[84],"trainable":[85],"parameters,":[86],"AP-Adapter":[87,122],"empowers":[88],"harness":[91],"both":[92],"global":[93],"(e.g.,":[94,100],"genre":[95,128],"timbre)":[97],"local":[99],"melody)":[101],"aspects":[102],"music,":[104],"using":[105],"original":[107],"short":[111],"text":[112],"as":[113],"inputs.":[114],"Through":[115],"objective":[116],"subjective":[118],"studies,":[119],"evaluate":[121],"three":[124],"tasks:":[125],"timbre":[126],"transfer,":[127,129],"accompaniment":[131],"generation.":[132],"Additionally,":[133],"demonstrate":[135],"its":[136],"effectiveness":[137],"out-of-domain":[139],"containing":[141],"unseen":[142],"instruments":[143],"during":[144],"training.":[145]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
