{"id":"https://openalex.org/W4399418461","doi":"https://doi.org/10.1145/3652583.3657586","title":"Mapping the Audio Landscape for Innovative Music Sample Generation","display_name":"Mapping the Audio Landscape for Innovative Music Sample Generation","publication_year":2024,"publication_date":"2024-05-30","ids":{"openalex":"https://openalex.org/W4399418461","doi":"https://doi.org/10.1145/3652583.3657586"},"language":"en","primary_location":{"id":"doi:10.1145/3652583.3657586","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3652583.3657586","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3657586","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3657586","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016207187","display_name":"Christian Limberg","orcid":"https://orcid.org/0000-0002-4903-3933"},"institutions":[{"id":"https://openalex.org/I184597095","display_name":"National Institute of Informatics","ror":"https://ror.org/04ksd4g47","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1319490839","https://openalex.org/I184597095","https://openalex.org/I4210158934"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Christian Limberg","raw_affiliation_strings":["National Institute of Informatics, Tokyo, Japan"],"raw_orcid":"https://orcid.org/0000-0002-4903-3933","affiliations":[{"raw_affiliation_string":"National Institute of Informatics, Tokyo, Japan","institution_ids":["https://openalex.org/I184597095"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5115076710","display_name":"Zhe Zhang","orcid":"https://orcid.org/0000-0002-7337-0446"},"institutions":[{"id":"https://openalex.org/I184597095","display_name":"National Institute of Informatics","ror":"https://ror.org/04ksd4g47","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1319490839","https://openalex.org/I184597095","https://openalex.org/I4210158934"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Zhe Zhang","raw_affiliation_strings":["National Institute of Informatics, Tokyo, Japan"],"raw_orcid":"https://orcid.org/0000-0003-1003-6093","affiliations":[{"raw_affiliation_string":"National Institute of Informatics, Tokyo, Japan","institution_ids":["https://openalex.org/I184597095"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5016207187"],"corresponding_institution_ids":["https://openalex.org/I184597095"],"apc_list":null,"apc_paid":null,"fwci":0.3288,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.50066267,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1207","last_page":"1213"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9930999875068665,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8152904510498047},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.7323113083839417},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.6467519998550415},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.6014989018440247},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5939540863037109},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.4543830156326294},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.4342288672924042},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4292130172252655},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.42870935797691345},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.38707271218299866},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.3426286578178406}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8152904510498047},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.7323113083839417},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.6467519998550415},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.6014989018440247},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5939540863037109},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.4543830156326294},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.4342288672924042},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4292130172252655},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42870935797691345},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.38707271218299866},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.3426286578178406},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3652583.3657586","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3652583.3657586","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3657586","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},{"id":"pmh:oai:pub.uni-bielefeld.de:2990342","is_oa":false,"landing_page_url":"https://pub.uni-bielefeld.de/record/2990342","pdf_url":null,"source":{"id":"https://openalex.org/S4306401670","display_name":"PUB \u2013 Publications at Bielefeld University (Bielefeld University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I20121455","host_organization_name":"Bielefeld University","host_organization_lineage":["https://openalex.org/I20121455"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"http://purl.org/coar/resource_type/c_5794"}],"best_oa_location":{"id":"doi:10.1145/3652583.3657586","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3652583.3657586","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3657586","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320320875","display_name":"Deutscher Akademischer Austauschdienst","ror":"https://ror.org/039djdh30"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4399418461.pdf","grobid_xml":"https://content.openalex.org/works/W4399418461.grobid-xml"},"referenced_works_count":29,"referenced_works":["https://openalex.org/W2519091744","https://openalex.org/W2775487773","https://openalex.org/W2913668833","https://openalex.org/W2948211236","https://openalex.org/W3015287975","https://openalex.org/W3093209529","https://openalex.org/W3099378280","https://openalex.org/W3099425575","https://openalex.org/W3123097577","https://openalex.org/W3136272958","https://openalex.org/W3160235471","https://openalex.org/W4205137627","https://openalex.org/W4287271111","https://openalex.org/W4297632254","https://openalex.org/W4297677272","https://openalex.org/W4300980117","https://openalex.org/W4301206121","https://openalex.org/W4303519914","https://openalex.org/W4306177919","https://openalex.org/W4309117899","https://openalex.org/W4311415873","https://openalex.org/W4318351475","https://openalex.org/W4367359628","https://openalex.org/W4372260516","https://openalex.org/W4372263311","https://openalex.org/W4372341951","https://openalex.org/W4380136719","https://openalex.org/W4383646190","https://openalex.org/W4390784372"],"related_works":["https://openalex.org/W4365211920","https://openalex.org/W3014948380","https://openalex.org/W4394785709","https://openalex.org/W4309969736","https://openalex.org/W4380551139","https://openalex.org/W4317695495","https://openalex.org/W2770818364","https://openalex.org/W2953501176","https://openalex.org/W2965095304","https://openalex.org/W2470043383"],"abstract_inverted_index":{"This":[0],"paper":[1],"introduces":[2],"the":[3,48,81,101,105,108,121],"Generative":[4],"Sample":[5],"Map":[6],"(GESAM),":[7],"a":[8,59,67,77,116],"novel":[9],"two-stage":[10],"unsupervised":[11],"learning":[12],"framework":[13],"capable":[14],"of":[15,50,107],"generating":[16],"high-quality":[17],"and":[18,90],"expressive":[19],"audio":[20,41,64,109],"samples":[21,42],"for":[22,75],"music":[23],"production.Recent":[24],"generative":[25],"approaches":[26],"based":[27],"on":[28,32,100,134],"language":[29],"models":[30],"rely":[31],"text":[33],"prompts":[34],"as":[35],"conditions.However,":[36],"fine":[37],"nuances":[38],"in":[39,47],"musical":[40],"can":[43],"hardly":[44],"be":[45],"described":[46],"modality":[49],"text.For":[51],"addressing":[52],"this":[53,124],"shortcoming,":[54],"we":[55,114],"propose":[56],"to":[57,85],"learn":[58],"highly":[60],"descriptive":[61],"latent":[62],"2D":[63],"map":[65,102],"by":[66],"Variational":[68],"Autoencoder":[69],"(VAE)":[70],"which":[71,131],"is":[72,132],"then":[73],"utilized":[74],"conditioning":[76],"Transformer":[78,82],"model.We":[79,122],"demonstrate":[80],"model's":[83],"ability":[84],"achieve":[86],"high":[87],"generation":[88],"quality":[89],"compare":[91],"its":[92],"performance":[93],"against":[94],"two":[95],"baseline":[96],"models.By":[97],"selecting":[98],"points":[99],"that":[103],"compresses":[104],"manifold":[106],"training":[110],"set":[111],"into":[112],"2D,":[113],"enable":[115],"more":[117],"natural":[118],"interaction":[119],"with":[120],"showcase":[123],"capability":[125],"through":[126],"an":[127],"interactive":[128],"demo":[129],"interface,":[130],"accessible":[133],"our":[135],"website":[136],"https://limchr.github.io/gesam/.":[137]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
