{"id":"https://openalex.org/W4410771963","doi":"https://doi.org/10.1109/icasspw65056.2025.11011167","title":"Singing Voice Accompaniment Data Augmentation with Generative Models","display_name":"Singing Voice Accompaniment Data Augmentation with Generative Models","publication_year":2025,"publication_date":"2025-04-06","ids":{"openalex":"https://openalex.org/W4410771963","doi":"https://doi.org/10.1109/icasspw65056.2025.11011167"},"language":"en","primary_location":{"id":"doi:10.1109/icasspw65056.2025.11011167","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icasspw65056.2025.11011167","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5084590912","display_name":"Miguel Perez","orcid":"https://orcid.org/0000-0002-5117-791X"},"institutions":[{"id":"https://openalex.org/I170486558","display_name":"Pompeu Fabra University","ror":"https://ror.org/04n0g0b29","country_code":"ES","type":"education","lineage":["https://openalex.org/I170486558"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Miguel Perez","raw_affiliation_strings":["Universitat Pompeu Fabra,Barcelona,Spain"],"affiliations":[{"raw_affiliation_string":"Universitat Pompeu Fabra,Barcelona,Spain","institution_ids":["https://openalex.org/I170486558"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074883610","display_name":"Holger Kirchhoff","orcid":"https://orcid.org/0009-0008-4655-729X"},"institutions":[{"id":"https://openalex.org/I4210166625","display_name":"Huawei German Research Center","ror":"https://ror.org/00z59w514","country_code":"DE","type":"facility","lineage":["https://openalex.org/I2250955327","https://openalex.org/I4210129353","https://openalex.org/I4210166625"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Holger Kirchhoff","raw_affiliation_strings":["Huawei Munich Research Center,Munich,Germany"],"affiliations":[{"raw_affiliation_string":"Huawei Munich Research Center,Munich,Germany","institution_ids":["https://openalex.org/I4210166625"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078194825","display_name":"Peter Grosche","orcid":"https://orcid.org/0000-0002-5525-5233"},"institutions":[{"id":"https://openalex.org/I4210166625","display_name":"Huawei German Research Center","ror":"https://ror.org/00z59w514","country_code":"DE","type":"facility","lineage":["https://openalex.org/I2250955327","https://openalex.org/I4210129353","https://openalex.org/I4210166625"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Peter Grosche","raw_affiliation_strings":["Huawei Munich Research Center,Munich,Germany"],"affiliations":[{"raw_affiliation_string":"Huawei Munich Research Center,Munich,Germany","institution_ids":["https://openalex.org/I4210166625"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5006479715","display_name":"Xavier Serra","orcid":"https://orcid.org/0000-0003-1395-2345"},"institutions":[{"id":"https://openalex.org/I170486558","display_name":"Pompeu Fabra University","ror":"https://ror.org/04n0g0b29","country_code":"ES","type":"education","lineage":["https://openalex.org/I170486558"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Xavier Serra","raw_affiliation_strings":["Universitat Pompeu Fabra,Barcelona,Spain"],"affiliations":[{"raw_affiliation_string":"Universitat Pompeu Fabra,Barcelona,Spain","institution_ids":["https://openalex.org/I170486558"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5084590912"],"corresponding_institution_ids":["https://openalex.org/I170486558"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.05962715,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9926000237464905,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9900000095367432,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/singing","display_name":"Singing","score":0.8916622400283813},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6845122575759888},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5603576898574829},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.5569263100624084},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.4446990191936493},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.23686686158180237},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.23174136877059937}],"concepts":[{"id":"https://openalex.org/C44819458","wikidata":"https://www.wikidata.org/wiki/Q27939","display_name":"Singing","level":2,"score":0.8916622400283813},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6845122575759888},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5603576898574829},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.5569263100624084},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.4446990191936493},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.23686686158180237},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.23174136877059937},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icasspw65056.2025.11011167","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icasspw65056.2025.11011167","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)","raw_type":"proceedings-article"},{"id":"pmh:oai:repositori-api.upf.edu:10230/71522","is_oa":false,"landing_page_url":"http://hdl.handle.net/10230/71522","pdf_url":null,"source":{"id":"https://openalex.org/S4306402615","display_name":"Repositori digital de la UPF (Universitat Pompeu Fabra)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I170486558","host_organization_name":"Universitat Pompeu Fabra","host_organization_lineage":["https://openalex.org/I170486558"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/acceptedVersion"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W2127141656","https://openalex.org/W2994884786","https://openalex.org/W3011176162","https://openalex.org/W3037149862","https://openalex.org/W3160649916","https://openalex.org/W3213726885","https://openalex.org/W4221159539","https://openalex.org/W4225281045","https://openalex.org/W4293363567","https://openalex.org/W4311187172","https://openalex.org/W4372260026","https://openalex.org/W4372260250","https://openalex.org/W4372260308","https://openalex.org/W4392705114","https://openalex.org/W4392909491","https://openalex.org/W4400033239","https://openalex.org/W4405882659","https://openalex.org/W6697015887","https://openalex.org/W6780218876","https://openalex.org/W6848578254","https://openalex.org/W6853096648","https://openalex.org/W6855932524","https://openalex.org/W6856125517"],"related_works":["https://openalex.org/W4365211920","https://openalex.org/W3014948380","https://openalex.org/W4391584540","https://openalex.org/W4380551139","https://openalex.org/W4317695495","https://openalex.org/W4395044357","https://openalex.org/W4287117424","https://openalex.org/W4387506531","https://openalex.org/W2087346071","https://openalex.org/W2967848559"],"abstract_inverted_index":{"Singing":[0],"voice":[1],"transcription":[2],"is":[3,38,51],"a":[4,19,64],"key":[5],"task":[6],"in":[7,26],"Music":[8],"Information":[9],"Retrieval":[10],"(MIR)":[11],"that":[12,70,97,112,122],"focuses":[13],"on":[14,31],"identifying":[15],"sung":[16],"notes":[17],"within":[18],"music":[20,73],"audio":[21,74,106],"segment.":[22],"Advancing":[23],"state-of-the-art":[24],"methods":[25],"this":[27,60],"area":[28],"relies":[29],"heavily":[30],"high-quality":[32],"data,":[33],"yet":[34],"annotating":[35],"such":[36],"data":[37,49,67],"resource-intensive":[39],"and":[40,56],"requires":[41],"musical":[42,83],"expertise.":[43],"In":[44,59],"genres":[45],"like":[46],"pop":[47],"music,":[48],"sharing":[50],"further":[52],"complicated":[53],"by":[54,124],"copyright":[55],"distribution":[57],"limitations.":[58],"paper,":[61],"we":[62,81],"refine":[63],"recently":[65],"proposed":[66],"augmentation":[68],"technique":[69],"leverages":[71],"AI-generated":[72],"to":[75],"address":[76],"these":[77],"data-related":[78],"challenges.":[79],"Specifically,":[80],"create":[82],"accompaniments":[84],"for":[85],"vocals":[86],"with":[87,126],"known":[88],"target":[89],"notes,":[90],"enabling":[91],"the":[92,99],"generation":[93],"of":[94],"new":[95],"mixes":[96,115],"retain":[98],"original":[100],"piece\u2019s":[101],"harmony":[102],"while":[103],"introducing":[104],"substantial":[105],"variation.":[107],"Our":[108],"cross-dataset":[109],"experiments":[110],"reveal":[111],"using":[113],"harmony-matched":[114],"improves":[116],"generalization,":[117],"though":[118],"performance":[119],"remains":[120],"below":[121],"achieved":[123],"training":[125],"additional":[127],"real":[128],"data.":[129]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
