{"id":"https://openalex.org/W4406093121","doi":"https://doi.org/10.1109/icassp49660.2025.10890052","title":"MusicGen-Stem: Multi-stem music generation and edition through autoregressive modeling","display_name":"MusicGen-Stem: Multi-stem music generation and edition through autoregressive modeling","publication_year":2025,"publication_date":"2025-03-12","ids":{"openalex":"https://openalex.org/W4406093121","doi":"https://doi.org/10.1109/icassp49660.2025.10890052"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49660.2025.10890052","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10890052","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2501.01757","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5084286684","display_name":"Simon Rouard","orcid":null},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"government","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I35345632","display_name":"Institut de Recherche et Coordination Acoustique Musique","ror":"https://ror.org/0121jnt59","country_code":"FR","type":"education","lineage":["https://openalex.org/I35345632"]},{"id":"https://openalex.org/I39804081","display_name":"Sorbonne Universit\u00e9","ror":"https://ror.org/02en5vm52","country_code":"FR","type":"education","lineage":["https://openalex.org/I39804081"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Simon Rouard","raw_affiliation_strings":["Meta &amp; UMR STMS IRCAM-CNRS-Sorbonne Univ"],"affiliations":[{"raw_affiliation_string":"Meta &amp; UMR STMS IRCAM-CNRS-Sorbonne Univ","institution_ids":["https://openalex.org/I35345632","https://openalex.org/I39804081","https://openalex.org/I1294671590"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108994797","display_name":"Robin San Roman","orcid":null},"institutions":[{"id":"https://openalex.org/I1326498283","display_name":"Institut national de recherche en sciences et technologies du num\u00e9rique","ror":"https://ror.org/02kvxyf05","country_code":"FR","type":"government","lineage":["https://openalex.org/I1326498283"]},{"id":"https://openalex.org/I4210121838","display_name":"Laboratoire Lorrain de Recherche en Informatique et ses Applications","ror":"https://ror.org/02vnf0c38","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I1326498283","https://openalex.org/I277688954","https://openalex.org/I4210107720","https://openalex.org/I4210121838","https://openalex.org/I4210159245","https://openalex.org/I90183372"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Robin San Roman","raw_affiliation_strings":["Meta, FAIR, Univ. de Lorraine, CNRS, Inria, Loria"],"affiliations":[{"raw_affiliation_string":"Meta, FAIR, Univ. de Lorraine, CNRS, Inria, Loria","institution_ids":["https://openalex.org/I1326498283","https://openalex.org/I4210121838"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005191803","display_name":"Yossi Adi","orcid":"https://orcid.org/0000-0003-2237-3898"},"institutions":[{"id":"https://openalex.org/I197251160","display_name":"Hebrew University of Jerusalem","ror":"https://ror.org/03qxff017","country_code":"IL","type":"education","lineage":["https://openalex.org/I197251160"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Yossi Adi","raw_affiliation_strings":["Meta &amp; Hebrew Univ. of Jerusalem"],"affiliations":[{"raw_affiliation_string":"Meta &amp; Hebrew Univ. of Jerusalem","institution_ids":["https://openalex.org/I197251160"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065828059","display_name":"Axel R\u00f6ebel","orcid":"https://orcid.org/0000-0001-6136-4391"},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"government","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I35345632","display_name":"Institut de Recherche et Coordination Acoustique Musique","ror":"https://ror.org/0121jnt59","country_code":"FR","type":"education","lineage":["https://openalex.org/I35345632"]},{"id":"https://openalex.org/I39804081","display_name":"Sorbonne Universit\u00e9","ror":"https://ror.org/02en5vm52","country_code":"FR","type":"education","lineage":["https://openalex.org/I39804081"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Axel Roebel","raw_affiliation_strings":["UMR STMS, IRCAM-CNRS Sorbonne Univ"],"affiliations":[{"raw_affiliation_string":"UMR STMS, IRCAM-CNRS Sorbonne Univ","institution_ids":["https://openalex.org/I35345632","https://openalex.org/I39804081","https://openalex.org/I1294671590"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5084286684"],"corresponding_institution_ids":["https://openalex.org/I1294671590","https://openalex.org/I35345632","https://openalex.org/I39804081"],"apc_list":null,"apc_paid":null,"fwci":1.2783,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.73740786,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9853000044822693,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9848999977111816,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bass","display_name":"Bass (fish)","score":0.8018608093261719},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7215196490287781},{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.6716666221618652},{"id":"https://openalex.org/keywords/musical-composition","display_name":"Musical composition","score":0.5300001502037048},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.5046185255050659},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.4679863452911377},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4417021870613098},{"id":"https://openalex.org/keywords/musical","display_name":"Musical","score":0.38754186034202576},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.37526077032089233},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.19876950979232788},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09215164184570312},{"id":"https://openalex.org/keywords/visual-arts","display_name":"Visual arts","score":0.0907863974571228}],"concepts":[{"id":"https://openalex.org/C2777182073","wikidata":"https://www.wikidata.org/wiki/Q1224135","display_name":"Bass (fish)","level":2,"score":0.8018608093261719},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7215196490287781},{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.6716666221618652},{"id":"https://openalex.org/C109568592","wikidata":"https://www.wikidata.org/wiki/Q207628","display_name":"Musical composition","level":3,"score":0.5300001502037048},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.5046185255050659},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.4679863452911377},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4417021870613098},{"id":"https://openalex.org/C558565934","wikidata":"https://www.wikidata.org/wiki/Q2743","display_name":"Musical","level":2,"score":0.38754186034202576},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37526077032089233},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.19876950979232788},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09215164184570312},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0907863974571228},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/icassp49660.2025.10890052","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10890052","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2501.01757","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2501.01757","pdf_url":"https://arxiv.org/pdf/2501.01757","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"pmh:oai:HAL:hal-04928296v1","is_oa":true,"landing_page_url":"https://hal.science/hal-04928296","pdf_url":"https://hal.science/hal-04928296v1/document","source":{"id":"https://openalex.org/S4406922461","display_name":"SPIRE - Sciences Po Institutional REpository","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ICASSP 2025, ICASSP, Apr 2025, Hyderrabad, India","raw_type":"Conference papers"},{"id":"doi:10.48550/arxiv.2501.01757","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2501.01757","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2501.01757","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2501.01757","pdf_url":"https://arxiv.org/pdf/2501.01757","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.47999998927116394}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4406093121.pdf","grobid_xml":"https://content.openalex.org/works/W4406093121.grobid-xml"},"referenced_works_count":33,"referenced_works":["https://openalex.org/W2405656250","https://openalex.org/W2972478942","https://openalex.org/W2998490864","https://openalex.org/W3215615641","https://openalex.org/W4307323391","https://openalex.org/W4312933868","https://openalex.org/W4372260250","https://openalex.org/W4372266552","https://openalex.org/W4392903114","https://openalex.org/W4392904237","https://openalex.org/W4393138539","https://openalex.org/W4396877837","https://openalex.org/W4398226295","https://openalex.org/W4401023668","https://openalex.org/W4409365005","https://openalex.org/W6714030504","https://openalex.org/W6838639034","https://openalex.org/W6848578254","https://openalex.org/W6848854281","https://openalex.org/W6849105126","https://openalex.org/W6853096648","https://openalex.org/W6853515095","https://openalex.org/W6854494257","https://openalex.org/W6856125517","https://openalex.org/W6860268860","https://openalex.org/W6860273036","https://openalex.org/W6861353174","https://openalex.org/W6861586093","https://openalex.org/W6868771744","https://openalex.org/W6869097082","https://openalex.org/W6869165705","https://openalex.org/W6869810883","https://openalex.org/W6872009179"],"related_works":["https://openalex.org/W587891163","https://openalex.org/W1991830762","https://openalex.org/W2782013024","https://openalex.org/W2011560150","https://openalex.org/W2076114088","https://openalex.org/W2494322668","https://openalex.org/W2134105472","https://openalex.org/W611295337","https://openalex.org/W4385805634","https://openalex.org/W2006928199"],"abstract_inverted_index":{"While":[0],"most":[1],"music":[2,50,65,121,138],"generation":[3,122,139,146],"models":[4],"generate":[5],"a":[6,18,70,76,82],"mixture":[7],"of":[8,54,64,113,130],"stems":[9,24,96],"(in":[10],"mono":[11],"or":[12,94,99],"stereo),":[13],"we":[14,39,57],"propose":[15],"to":[16,47,68,81,90,127],"train":[17,40,69],"multi-stem":[19,136],"generative":[20],"model":[21,74,87,140,153],"with":[22],"3":[23],"(bass,":[25],"drums":[26,93],"and":[27,124,147,152,158],"other)":[28],"that":[29,141],"learn":[30],"the":[31,49,62,128,133],"musical":[32],"dependencies":[33],"between":[34],"them.":[35],"To":[36],"do":[37],"so,":[38],"one":[41],"specialized":[42],"compression":[43],"algorithm":[44],"per":[45],"stem":[46],"tokenize":[48],"into":[51],"parallel":[52],"streams":[53],"tokens.":[55],"Then,":[56],"leverage":[58],"recent":[59],"improvements":[60],"in":[61,120],"task":[63],"source":[66,149],"separation":[67],"multi-stream":[71],"text-to-music":[72],"language":[73],"on":[75,97,111,162],"large":[77],"dataset.":[78],"Finally,":[79],"thanks":[80],"particular":[83],"conditioning":[84],"method,":[85],"our":[86,131],"is":[88,126],"able":[89],"edit":[91],"bass,":[92],"other":[95],"existing":[98,114],"generated":[100],"songs":[101],"as":[102,104],"well":[103],"doing":[105],"iterative":[106],"composition":[107],"(e.g.":[108],"generating":[109],"bass":[110],"top":[112],"drums).":[115],"This":[116],"gives":[117],"more":[118],"flexibility":[119],"algorithms":[123],"it":[125],"best":[129],"knowledge":[132],"first":[134],"open-source":[135],"autoregressive":[137],"can":[142],"perform":[143],"good":[144],"quality":[145],"coherent":[148],"editing.":[150],"Code":[151],"weights":[154],"will":[155],"be":[156],"released":[157],"samples":[159],"are":[160],"available":[161],"simonrouard.github.io/musicgenstem.":[163]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-04-23T09:07:50.710637","created_date":"2025-01-07T00:00:00"}
