{"id":"https://openalex.org/W6911202291","doi":"https://doi.org/10.5281/zenodo.10265271","title":"Timbre Transfer Using Image-to-Image Denoising Diffusion Implicit Models","display_name":"Timbre Transfer Using Image-to-Image Denoising Diffusion Implicit Models","publication_year":2023,"publication_date":"2023-11-04","ids":{"openalex":"https://openalex.org/W6911202291","doi":"https://doi.org/10.5281/zenodo.10265271"},"language":"en","primary_location":{"id":"doi:10.5281/zenodo.10265271","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.10265271","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":""},"type":"article","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.5281/zenodo.10265271","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Luca Comanducci","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luca Comanducci","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Fabio Antonacci","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fabio Antonacci","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Augusto Sarti","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Augusto Sarti","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.28209731,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.34869998693466187,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.34869998693466187,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.18400000035762787,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.11800000071525574,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/timbre","display_name":"Timbre","score":0.9416999816894531},{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.6018000245094299},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.5734999775886536},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.5598000288009644},{"id":"https://openalex.org/keywords/transfer-function","display_name":"Transfer function","score":0.4447999894618988},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.38179999589920044},{"id":"https://openalex.org/keywords/waveform","display_name":"Waveform","score":0.3528999984264374},{"id":"https://openalex.org/keywords/musical-instrument","display_name":"Musical instrument","score":0.3497999906539917},{"id":"https://openalex.org/keywords/active-listening","display_name":"Active listening","score":0.33469998836517334}],"concepts":[{"id":"https://openalex.org/C2776539107","wikidata":"https://www.wikidata.org/wiki/Q176501","display_name":"Timbre","level":3,"score":0.9416999816894531},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6905999779701233},{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.6018000245094299},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5925999879837036},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.5734999775886536},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.5598000288009644},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47209998965263367},{"id":"https://openalex.org/C81299745","wikidata":"https://www.wikidata.org/wiki/Q334269","display_name":"Transfer function","level":2,"score":0.4447999894618988},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.38179999589920044},{"id":"https://openalex.org/C197424946","wikidata":"https://www.wikidata.org/wiki/Q1165717","display_name":"Waveform","level":3,"score":0.3528999984264374},{"id":"https://openalex.org/C2983311337","wikidata":"https://www.wikidata.org/wiki/Q34379","display_name":"Musical instrument","level":2,"score":0.3497999906539917},{"id":"https://openalex.org/C177291462","wikidata":"https://www.wikidata.org/wiki/Q423038","display_name":"Active listening","level":2,"score":0.33469998836517334},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.32589998841285706},{"id":"https://openalex.org/C2781100714","wikidata":"https://www.wikidata.org/wiki/Q377435","display_name":"Vibrato","level":3,"score":0.3131999969482422},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.30309998989105225},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.2897999882698059},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.27799999713897705},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.27630001306533813},{"id":"https://openalex.org/C128422554","wikidata":"https://www.wikidata.org/wiki/Q20077126","display_name":"Sound recording and reproduction","level":2,"score":0.27129998803138733},{"id":"https://openalex.org/C75291252","wikidata":"https://www.wikidata.org/wiki/Q1315756","display_name":"TRACE (psycholinguistics)","level":2,"score":0.26759999990463257},{"id":"https://openalex.org/C177454536","wikidata":"https://www.wikidata.org/wiki/Q578290","display_name":"Emphasis (telecommunications)","level":2,"score":0.2606000006198883},{"id":"https://openalex.org/C29265498","wikidata":"https://www.wikidata.org/wiki/Q7047719","display_name":"Noise measurement","level":3,"score":0.2590999901294708},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.257099986076355},{"id":"https://openalex.org/C86011254","wikidata":"https://www.wikidata.org/wiki/Q2717062","display_name":"Musical tone","level":4,"score":0.25679999589920044},{"id":"https://openalex.org/C100675267","wikidata":"https://www.wikidata.org/wiki/Q1371624","display_name":"Background noise","level":2,"score":0.2558000087738037},{"id":"https://openalex.org/C127220857","wikidata":"https://www.wikidata.org/wiki/Q2719318","display_name":"Audio signal processing","level":4,"score":0.25519999861717224},{"id":"https://openalex.org/C2776175482","wikidata":"https://www.wikidata.org/wiki/Q1195816","display_name":"Transfer (computing)","level":2,"score":0.2508000135421753}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.5281/zenodo.10265271","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.10265271","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":""}],"best_oa_location":{"id":"doi:10.5281/zenodo.10265271","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.10265271","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":""},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.4341197609901428}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Timbre":[0],"transfer":[1,97],"techniques":[2],"aim":[3],"at":[4],"converting":[5,102,134],"the":[6,17,34,67,79,84,95,103,113,116,121,147,165,168],"sound":[7],"of":[8,38,87,115,167],"a":[9],"musical":[10,39],"piece":[11],"generated":[12],"by":[13,25,83,100,111,133],"one":[14,19],"instrument":[15],"into":[16,106],"same":[18],"as":[20,30,32,42],"if":[21],"it":[22],"was":[23],"played":[24],"another":[26],"instrument,":[27],"while":[28],"maintaining":[29],"much":[31],"possible":[33],"content":[35],"in":[36,50,161],"terms":[37],"characteristics":[40],"such":[41],"melody":[43],"and":[44,110,129,141,158],"dynamics.":[45],"Following":[46],"their":[47],"recent":[48,85],"breakthroughs":[49],"deep":[51],"learning-based":[52],"generation,":[53],"we":[54,65,93],"apply":[55,66],"Denoising":[56,70],"Diffusion":[57,71],"Models":[58,73],"(DDMs)":[59],"to":[60,77,89,163],"perform":[61,126],"timbre":[62,96,118,123,131],"transfer.":[63],"Specifically,":[64],"recently":[68],"proposed":[69,148,169],"Implicit":[72],"(DDIMs)":[74],"that":[75],"enable":[76],"accelerate":[78],"sampling":[80],"procedure.":[81],"Inspired":[82],"application":[86],"DDMs":[88],"image":[90],"translation":[91],"problems":[92],"formulate":[94],"task":[98],"similarly,":[99],"first":[101],"audio":[104,135],"tracks":[105],"log":[107],"mel":[108],"spectrograms":[109],"conditioning":[112],"generation":[114],"desired":[117],"spectrogram":[119],"through":[120,155],"input":[122],"spectrogram.":[124],"We":[125,145],"both":[127,154],"one-to-one":[128],"many-to-many":[130],"transfer,":[132],"waveforms":[136],"containing":[137],"only":[138],"single":[139],"instruments":[140],"multiple":[142],"instruments,":[143],"respectively.":[144],"compare":[146],"technique":[149],"with":[150],"existing":[151],"state-of-the-art":[152],"methods":[153],"listening":[156],"tests":[157],"objective":[159],"measures":[160],"order":[162],"demonstrate":[164],"effectiveness":[166],"model.":[170]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
