{"id":"https://openalex.org/W4372260516","doi":"https://doi.org/10.1109/icassp49357.2023.10096670","title":"Continuous Descriptor-Based Control for Deep Audio Synthesis","display_name":"Continuous Descriptor-Based Control for Deep Audio Synthesis","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4372260516","doi":"https://doi.org/10.1109/icassp49357.2023.10096670"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10096670","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096670","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5006813412","display_name":"Ninon Devis","orcid":null},"institutions":[{"id":"https://openalex.org/I4389425508","display_name":"Sciences et Technologies de la Musique et du Son","ror":"https://ror.org/025xvn046","country_code":null,"type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I2802818602","https://openalex.org/I35345632","https://openalex.org/I39804081","https://openalex.org/I4210102700","https://openalex.org/I4389425508"]},{"id":"https://openalex.org/I35345632","display_name":"Institut de Recherche et Coordination Acoustique Musique","ror":"https://ror.org/0121jnt59","country_code":"FR","type":"education","lineage":["https://openalex.org/I35345632"]},{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I39804081","display_name":"Sorbonne Universit\u00e9","ror":"https://ror.org/02en5vm52","country_code":"FR","type":"education","lineage":["https://openalex.org/I39804081"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Ninon Devis","raw_affiliation_strings":["IRCAM - Sorbonne Universit&#x00E9;, CNRS UMR 9912,Paris,France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IRCAM - Sorbonne Universit&#x00E9;, CNRS UMR 9912,Paris,France","institution_ids":["https://openalex.org/I35345632","https://openalex.org/I39804081","https://openalex.org/I4389425508","https://openalex.org/I1294671590"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062722091","display_name":"Nils Demerl\u00e9","orcid":null},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I4389425508","display_name":"Sciences et Technologies de la Musique et du Son","ror":"https://ror.org/025xvn046","country_code":null,"type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I2802818602","https://openalex.org/I35345632","https://openalex.org/I39804081","https://openalex.org/I4210102700","https://openalex.org/I4389425508"]},{"id":"https://openalex.org/I39804081","display_name":"Sorbonne Universit\u00e9","ror":"https://ror.org/02en5vm52","country_code":"FR","type":"education","lineage":["https://openalex.org/I39804081"]},{"id":"https://openalex.org/I35345632","display_name":"Institut de Recherche et Coordination Acoustique Musique","ror":"https://ror.org/0121jnt59","country_code":"FR","type":"education","lineage":["https://openalex.org/I35345632"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Nils Demerl\u00e9","raw_affiliation_strings":["IRCAM - Sorbonne Universit&#x00E9;, CNRS UMR 9912,Paris,France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IRCAM - Sorbonne Universit&#x00E9;, CNRS UMR 9912,Paris,France","institution_ids":["https://openalex.org/I35345632","https://openalex.org/I39804081","https://openalex.org/I4389425508","https://openalex.org/I1294671590"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090798567","display_name":"Sarah Nabi","orcid":"https://orcid.org/0009-0005-2932-2470"},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I4389425508","display_name":"Sciences et Technologies de la Musique et du Son","ror":"https://ror.org/025xvn046","country_code":null,"type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I2802818602","https://openalex.org/I35345632","https://openalex.org/I39804081","https://openalex.org/I4210102700","https://openalex.org/I4389425508"]},{"id":"https://openalex.org/I35345632","display_name":"Institut de Recherche et Coordination Acoustique Musique","ror":"https://ror.org/0121jnt59","country_code":"FR","type":"education","lineage":["https://openalex.org/I35345632"]},{"id":"https://openalex.org/I39804081","display_name":"Sorbonne Universit\u00e9","ror":"https://ror.org/02en5vm52","country_code":"FR","type":"education","lineage":["https://openalex.org/I39804081"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Sarah Nabi","raw_affiliation_strings":["IRCAM - Sorbonne Universit&#x00E9;, CNRS UMR 9912,Paris,France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IRCAM - Sorbonne Universit&#x00E9;, CNRS UMR 9912,Paris,France","institution_ids":["https://openalex.org/I35345632","https://openalex.org/I39804081","https://openalex.org/I4389425508","https://openalex.org/I1294671590"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041371281","display_name":"David Genova","orcid":null},"institutions":[{"id":"https://openalex.org/I39804081","display_name":"Sorbonne Universit\u00e9","ror":"https://ror.org/02en5vm52","country_code":"FR","type":"education","lineage":["https://openalex.org/I39804081"]},{"id":"https://openalex.org/I4389425508","display_name":"Sciences et Technologies de la Musique et du Son","ror":"https://ror.org/025xvn046","country_code":null,"type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I2802818602","https://openalex.org/I35345632","https://openalex.org/I39804081","https://openalex.org/I4210102700","https://openalex.org/I4389425508"]},{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I35345632","display_name":"Institut de Recherche et Coordination Acoustique Musique","ror":"https://ror.org/0121jnt59","country_code":"FR","type":"education","lineage":["https://openalex.org/I35345632"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"David Genova","raw_affiliation_strings":["IRCAM - Sorbonne Universit&#x00E9;, CNRS UMR 9912,Paris,France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IRCAM - Sorbonne Universit&#x00E9;, CNRS UMR 9912,Paris,France","institution_ids":["https://openalex.org/I35345632","https://openalex.org/I39804081","https://openalex.org/I4389425508","https://openalex.org/I1294671590"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5085170922","display_name":"Philippe Esling","orcid":"https://orcid.org/0000-0002-1655-7909"},"institutions":[{"id":"https://openalex.org/I4389425508","display_name":"Sciences et Technologies de la Musique et du Son","ror":"https://ror.org/025xvn046","country_code":null,"type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I2802818602","https://openalex.org/I35345632","https://openalex.org/I39804081","https://openalex.org/I4210102700","https://openalex.org/I4389425508"]},{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I39804081","display_name":"Sorbonne Universit\u00e9","ror":"https://ror.org/02en5vm52","country_code":"FR","type":"education","lineage":["https://openalex.org/I39804081"]},{"id":"https://openalex.org/I35345632","display_name":"Institut de Recherche et Coordination Acoustique Musique","ror":"https://ror.org/0121jnt59","country_code":"FR","type":"education","lineage":["https://openalex.org/I35345632"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Philippe Esling","raw_affiliation_strings":["IRCAM - Sorbonne Universit&#x00E9;, CNRS UMR 9912,Paris,France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IRCAM - Sorbonne Universit&#x00E9;, CNRS UMR 9912,Paris,France","institution_ids":["https://openalex.org/I35345632","https://openalex.org/I39804081","https://openalex.org/I4389425508","https://openalex.org/I1294671590"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5006813412"],"corresponding_institution_ids":["https://openalex.org/I1294671590","https://openalex.org/I35345632","https://openalex.org/I39804081","https://openalex.org/I4389425508"],"apc_list":null,"apc_paid":null,"fwci":2.0872,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.87674289,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8080359697341919},{"id":"https://openalex.org/keywords/timbre","display_name":"Timbre","score":0.7423998117446899},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5472760200500488},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.5220111012458801},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.5177687406539917},{"id":"https://openalex.org/keywords/controllability","display_name":"Controllability","score":0.5071697235107422},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.5065187215805054},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.47892436385154724},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4730895161628723},{"id":"https://openalex.org/keywords/salient","display_name":"Salient","score":0.4458320438861847},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.4241305887699127},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.41473448276519775},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.33822569251060486},{"id":"https://openalex.org/keywords/musical","display_name":"Musical","score":0.1844933032989502}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8080359697341919},{"id":"https://openalex.org/C2776539107","wikidata":"https://www.wikidata.org/wiki/Q176501","display_name":"Timbre","level":3,"score":0.7423998117446899},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5472760200500488},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.5220111012458801},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.5177687406539917},{"id":"https://openalex.org/C48209547","wikidata":"https://www.wikidata.org/wiki/Q1331104","display_name":"Controllability","level":2,"score":0.5071697235107422},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.5065187215805054},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.47892436385154724},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4730895161628723},{"id":"https://openalex.org/C2780719617","wikidata":"https://www.wikidata.org/wiki/Q1030752","display_name":"Salient","level":2,"score":0.4458320438861847},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.4241305887699127},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.41473448276519775},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.33822569251060486},{"id":"https://openalex.org/C558565934","wikidata":"https://www.wikidata.org/wiki/Q2743","display_name":"Musical","level":2,"score":0.1844933032989502},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icassp49357.2023.10096670","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096670","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:HAL:hal-04467264v1","is_oa":false,"landing_page_url":"https://hal.science/hal-04467264","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Jun 2023, Rhodes Island, France. pp.1-5, &#x27E8;10.1109/ICASSP49357.2023.10096670&#x27E9;","raw_type":"Conference papers"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.46000000834465027}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1959608418","https://openalex.org/W2008066450","https://openalex.org/W2188365844","https://openalex.org/W2584032004","https://openalex.org/W2606176153","https://openalex.org/W2621350877","https://openalex.org/W2760103357","https://openalex.org/W2797583228","https://openalex.org/W2962968458","https://openalex.org/W2963626105","https://openalex.org/W2970006822","https://openalex.org/W2971753973","https://openalex.org/W3000389243","https://openalex.org/W3046330735","https://openalex.org/W3097934054","https://openalex.org/W3124126546","https://openalex.org/W3144035034","https://openalex.org/W4309117899","https://openalex.org/W4320013936","https://openalex.org/W6631190155","https://openalex.org/W6640963894","https://openalex.org/W6687045409","https://openalex.org/W6732429163","https://openalex.org/W6736723571","https://openalex.org/W6739139180","https://openalex.org/W6750665317","https://openalex.org/W6767111847","https://openalex.org/W6767453231","https://openalex.org/W6771763809","https://openalex.org/W6779841522","https://openalex.org/W6781288810","https://openalex.org/W6789449647","https://openalex.org/W6803922201"],"related_works":["https://openalex.org/W2036697162","https://openalex.org/W2332386680","https://openalex.org/W2561315646","https://openalex.org/W2248621902","https://openalex.org/W2003779889","https://openalex.org/W4205698120","https://openalex.org/W4239246781","https://openalex.org/W2542825942","https://openalex.org/W2963658876","https://openalex.org/W2885916054"],"abstract_inverted_index":{"Despite":[0],"significant":[1],"advances":[2],"in":[3,43,73,90],"deep":[4,40,56],"models":[5,25,42],"for":[6,109],"music":[7],"generation,":[8,33,114],"the":[9,32,37,79,91,113,122],"use":[10],"of":[11,39,81,112,124,131,149],"these":[12],"techniques":[13],"remains":[14],"restricted":[15],"to":[16,70,116],"expert":[17],"users.":[18],"Before":[19],"being":[20],"democratized":[21],"among":[22],"musicians,":[23],"generative":[24,41,57],"must":[26],"first":[27],"provide":[28],"expressive":[29,61],"control":[30,111],"over":[31],"as":[34,104],"this":[35,47,51],"conditions":[36],"integration":[38],"creative":[44],"workflows.":[45],"In":[46],"paper,":[48],"we":[49],"tackle":[50],"issue":[52],"by":[53,84],"introducing":[54],"a":[55,74,117,128],"audio":[58],"model":[59],"providing":[60,140],"and":[62,136,143],"continuous":[63,110],"descriptor-based":[64],"control,":[65],"while":[66,139],"remaining":[67],"lightweight":[68],"enough":[69],"be":[71],"embedded":[72],"hardware":[75],"synthesizer.":[76],"We":[77,120],"enforce":[78],"controllability":[80],"real-time":[82],"generation":[83],"explicitly":[85],"removing":[86],"salient":[87],"musical":[88],"features":[89,100],"latent":[92],"space":[93],"using":[94],"an":[95],"adversarial":[96],"confusion":[97],"criterion.":[98],"User-specified":[99],"are":[101],"then":[102],"reintroduced":[103],"additional":[105],"conditioning":[106],"information,":[107],"allowing":[108,146],"akin":[115],"synthesizer":[118],"knob.":[119],"assess":[121],"performance":[123],"our":[125],"method":[126],"on":[127],"wide":[129],"variety":[130],"sounds":[132],"including":[133],"instrumental,":[134],"percussive":[135],"speech":[137],"recordings":[138],"both":[141],"timbre":[142],"attributes":[144],"transfer,":[145],"new":[147],"ways":[148],"generating":[150],"sounds.":[151]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":1}],"updated_date":"2026-05-07T13:39:58.223016","created_date":"2025-10-10T00:00:00"}
