{"id":"https://openalex.org/W6968644326","doi":"https://doi.org/10.5281/zenodo.4285406","title":"Spectrogram Inpainting for Interactive Generation of Instrument Sounds","display_name":"Spectrogram Inpainting for Interactive Generation of Instrument Sounds","publication_year":2020,"publication_date":"2020-10-19","ids":{"openalex":"https://openalex.org/W6968644326","doi":"https://doi.org/10.5281/zenodo.4285406"},"language":"en","primary_location":{"id":"doi:10.5281/zenodo.4285406","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.4285406","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":""},"type":"article","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.5281/zenodo.4285406","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Th\u00e9is Bazin","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Th\u00e9is Bazin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Ga\u00ebtan Hadjeres","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ga\u00ebtan Hadjeres","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Philippe Esling","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Philippe Esling","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Mikhail Malt","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mikhail Malt","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.31350407,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.43149998784065247,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.43149998784065247,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.2847000062465668,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.03999999910593033,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.9404000043869019},{"id":"https://openalex.org/keywords/inpainting","display_name":"Inpainting","score":0.6517000198364258},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.43869999051094055},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.4357999861240387},{"id":"https://openalex.org/keywords/resampling","display_name":"Resampling","score":0.42089998722076416},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.41850000619888306},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.3352999985218048}],"concepts":[{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.9404000043869019},{"id":"https://openalex.org/C11727466","wikidata":"https://www.wikidata.org/wiki/Q1628157","display_name":"Inpainting","level":3,"score":0.6517000198364258},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6205999851226807},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5940999984741211},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5069000124931335},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.43869999051094055},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.4357999861240387},{"id":"https://openalex.org/C150921843","wikidata":"https://www.wikidata.org/wiki/Q1170431","display_name":"Resampling","level":2,"score":0.42089998722076416},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.41850000619888306},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.3352999985218048},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.32670000195503235},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.31949999928474426},{"id":"https://openalex.org/C2780233690","wikidata":"https://www.wikidata.org/wiki/Q535347","display_name":"Transparency (behavior)","level":2,"score":0.30959999561309814},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.2689000070095062},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.2678000032901764},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.2651999890804291},{"id":"https://openalex.org/C89505385","wikidata":"https://www.wikidata.org/wiki/Q47146","display_name":"User interface","level":2,"score":0.26510000228881836},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.26489999890327454},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.2615000009536743},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.25040000677108765}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.5281/zenodo.4285406","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.4285406","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":""}],"best_oa_location":{"id":"doi:10.5281/zenodo.4285406","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.4285406","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":""},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Modern":[0],"approaches":[1],"to":[2,11,46,66,70,111,123],"sound":[3],"synthesis":[4],"using":[5],"deep":[6],"neural":[7],"networks":[8],"are":[9],"hard":[10],"control,":[12],"especially":[13],"when":[14],"fine-grained":[15],"conditioning":[16],"information":[17],"is":[18],"not":[19],"available,":[20],"hindering":[21],"their":[22],"adoption":[23],"by":[24,114],"musicians.":[25],"In":[26],"this":[27,51],"paper,":[28],"we":[29,53,59,78,105],"cast":[30],"the":[31,61,84,92,96],"generation":[32,64,86],"of":[33,87],"individual":[34],"instrumental":[35],"notes":[36],"as":[37],"an":[38,107],"inpainting-based":[39,85],"task,":[40],"introducing":[41],"novel":[42],"and":[43,118],"unique":[44],"ways":[45],"iteratively":[47],"shape":[48],"sounds.":[49],"To":[50],"end,":[52],"propose":[54],"a":[55],"two-step":[56],"approach:":[57],"first,":[58],"adapt":[60],"VQ-VAE-2":[62],"image":[63],"architecture":[65,94],"spectrograms":[67,73],"in":[68],"order":[69],"convert":[71],"real-valued":[72],"into":[74],"compact":[75],"discrete":[76],"codemaps,":[77],"then":[79],"implement":[80],"token-masked":[81],"Transformers":[82],"for":[83,116],"these":[88],"codemaps.":[89],"We":[90],"apply":[91],"proposed":[93],"on":[95,99],"NSynth":[97],"dataset":[98],"masked":[100],"resampling":[101],"tasks.":[102],"Most":[103],"crucially,":[104],"open-source":[106],"interactive":[108],"web":[109],"interface":[110],"transform":[112],"sounds":[113],"inpainting,":[115],"artists":[117],"practitioners":[119],"alike,":[120],"opening":[121],"up":[122],"new,":[124],"creative":[125],"uses.":[126]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
