{"id":"https://openalex.org/W4415537706","doi":"https://doi.org/10.1145/3746027.3755710","title":"MuCodec: Ultra Low-Bitrate Music Codec for Music Generation","display_name":"MuCodec: Ultra Low-Bitrate Music Codec for Music Generation","publication_year":2025,"publication_date":"2025-10-25","ids":{"openalex":"https://openalex.org/W4415537706","doi":"https://doi.org/10.1145/3746027.3755710"},"language":null,"primary_location":{"id":"doi:10.1145/3746027.3755710","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3746027.3755710","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3746027.3755710","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114423981","display_name":"Yaoxun Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I3131625388","display_name":"University Town of Shenzhen","ror":"https://ror.org/05f5j6225","country_code":"CN","type":"education","lineage":["https://openalex.org/I3131625388"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yaoxun Xu","raw_affiliation_strings":["Tsinghua University, Shenzhen, Guangdong, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Shenzhen, Guangdong, China","institution_ids":["https://openalex.org/I3131625388","https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048946763","display_name":"Hangting Chen","orcid":"https://orcid.org/0000-0002-4085-4364"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hangting Chen","raw_affiliation_strings":["Tencent AI Lab, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tencent AI Lab, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004643540","display_name":"Jianwei Yu","orcid":"https://orcid.org/0000-0002-2449-1436"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianwei Yu","raw_affiliation_strings":["Tencent AI Lab, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tencent AI Lab, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110323206","display_name":"W. Tan","orcid":null},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Tan","raw_affiliation_strings":["Tencent AI Lab, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tencent AI Lab, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103014224","display_name":"Shun Lei","orcid":"https://orcid.org/0000-0003-3597-3913"},"institutions":[{"id":"https://openalex.org/I3131625388","display_name":"University Town of Shenzhen","ror":"https://ror.org/05f5j6225","country_code":"CN","type":"education","lineage":["https://openalex.org/I3131625388"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shun Lei","raw_affiliation_strings":["Tsinghua University, Shenzhen, Guangdong, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Shenzhen, Guangdong, China","institution_ids":["https://openalex.org/I3131625388","https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114423982","display_name":"Zhiwei Lin","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]},{"id":"https://openalex.org/I3131625388","display_name":"University Town of Shenzhen","ror":"https://ror.org/05f5j6225","country_code":"CN","type":"education","lineage":["https://openalex.org/I3131625388"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiwei Lin","raw_affiliation_strings":["Tsinghua University, Shenzhen, Guangdong, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Shenzhen, Guangdong, China","institution_ids":["https://openalex.org/I3131625388","https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038895203","display_name":"Rongzhi Gu","orcid":"https://orcid.org/0000-0003-1861-9170"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rongzhi Gu","raw_affiliation_strings":["Tencent AI Lab, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tencent AI Lab, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102869280","display_name":"Zhiyong Wu","orcid":"https://orcid.org/0000-0001-8533-0524"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]},{"id":"https://openalex.org/I3131625388","display_name":"University Town of Shenzhen","ror":"https://ror.org/05f5j6225","country_code":"CN","type":"education","lineage":["https://openalex.org/I3131625388"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiyong Wu","raw_affiliation_strings":["Tsinghua University, Shenzhen, Guangdong, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Shenzhen, Guangdong, China","institution_ids":["https://openalex.org/I3131625388","https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5114423981"],"corresponding_institution_ids":["https://openalex.org/I3131625388","https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.4141605,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"689","last_page":"698"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/codec","display_name":"Codec","score":0.755299985408783},{"id":"https://openalex.org/keywords/pop-music-automation","display_name":"Pop music automation","score":0.5253000259399414},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.38690000772476196},{"id":"https://openalex.org/keywords/vector-quantization","display_name":"Vector quantization","score":0.35670000314712524},{"id":"https://openalex.org/keywords/sound-quality","display_name":"Sound quality","score":0.35100001096725464},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.34850001335144043},{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.3327000141143799},{"id":"https://openalex.org/keywords/stereophonic-sound","display_name":"Stereophonic sound","score":0.3109999895095825}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7694000005722046},{"id":"https://openalex.org/C161765866","wikidata":"https://www.wikidata.org/wiki/Q184748","display_name":"Codec","level":2,"score":0.755299985408783},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5723999738693237},{"id":"https://openalex.org/C73520026","wikidata":"https://www.wikidata.org/wiki/Q7229091","display_name":"Pop music automation","level":4,"score":0.5253000259399414},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.38690000772476196},{"id":"https://openalex.org/C199833920","wikidata":"https://www.wikidata.org/wiki/Q612536","display_name":"Vector quantization","level":2,"score":0.35670000314712524},{"id":"https://openalex.org/C167310288","wikidata":"https://www.wikidata.org/wiki/Q7564808","display_name":"Sound quality","level":2,"score":0.35100001096725464},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.34850001335144043},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.3327000141143799},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.32359999418258667},{"id":"https://openalex.org/C140631703","wikidata":"https://www.wikidata.org/wiki/Q34678","display_name":"Stereophonic sound","level":3,"score":0.3109999895095825},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.3070000112056732},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.30169999599456787},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.30160000920295715},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.29260000586509705},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.2816999852657318},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.2815999984741211},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.27869999408721924},{"id":"https://openalex.org/C542929976","wikidata":"https://www.wikidata.org/wiki/Q9730","display_name":"Classical music","level":3,"score":0.26809999346733093},{"id":"https://openalex.org/C155512373","wikidata":"https://www.wikidata.org/wiki/Q287450","display_name":"Residual","level":2,"score":0.26249998807907104},{"id":"https://openalex.org/C3261483","wikidata":"https://www.wikidata.org/wiki/Q119565","display_name":"Frame rate","level":2,"score":0.2558000087738037},{"id":"https://openalex.org/C64922751","wikidata":"https://www.wikidata.org/wiki/Q4650799","display_name":"Audio signal","level":3,"score":0.25429999828338623},{"id":"https://openalex.org/C2778263558","wikidata":"https://www.wikidata.org/wiki/Q46384","display_name":"Microphone","level":3,"score":0.2529999911785126},{"id":"https://openalex.org/C114611597","wikidata":"https://www.wikidata.org/wiki/Q373342","display_name":"Popular music","level":2,"score":0.2524999976158142}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3746027.3755710","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3746027.3755710","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3746027.3755710","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3746027.3755710","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W1728888090","https://openalex.org/W3096831136","https://openalex.org/W3215615641","https://openalex.org/W4353015365","https://openalex.org/W4372260250","https://openalex.org/W4396877837","https://openalex.org/W4402112533","https://openalex.org/W4404740148"],"related_works":[],"abstract_inverted_index":{"Music":[0],"generation":[1],"is":[2,37,86,205],"pivotal":[3],"in":[4,42,72,154,227],"multimedia,":[5],"aiding":[6],"creation":[7],"and":[8,21,103,112,151,166,196,230,233,241],"lowering":[9],"the":[10,52,91,199,206],"creative":[11],"threshold.":[12],"It":[13],"focuses":[14],"on":[15,25],"generating":[16],"music":[17,35,46,69,96,117,128,136,190,214,238],"with":[18,31,51,105,174],"clear":[19],"vocals":[20,102],"harmonious":[22],"accompaniment":[23,104],"based":[24],"lyrics,":[26],"combining":[27],"high":[28,106],"artistic":[29],"creativity":[30],"technical":[32],"challenges.":[33],"The":[34,183],"codec":[36,208],"an":[38,216],"important":[39],"bridging":[40],"component":[41],"large":[43],"language":[44,49],"model-based":[45],"generation,":[47],"connecting":[48],"models":[50],"generated":[53],"music.":[54],"However,":[55],"existing":[56],"neural":[57],"codecs":[58,97],"typically":[59],"require":[60],"token":[61],"rates":[62,111],"exceeding":[63],"50":[64],"Hz":[65],"to":[66,148],"achieve":[67],"acceptable":[68],"quality,":[70],"resulting":[71],"a":[73,81,140,155,192],"context":[74],"length":[75],"that":[76,85,98],"surpasses":[77],"12,000":[78],"tokens":[79],"for":[80,93,126],"4-minute":[82],"song-a":[83],"scale":[84],"computationally":[87],"demanding.":[88],"This":[89],"highlights":[90],"need":[92],"high-compression,":[94],"high-fidelity":[95],"can":[99,234],"reconstruct":[100],"both":[101,228],"quality":[107,176],"at":[108,130,215],"low":[109],"frame":[110],"bitrates,":[113,132],"thereby":[114],"better":[115],"assisting":[116],"generation.":[118,137,239],"To":[119,198],"address":[120],"this,":[121],"we":[122],"introduce":[123],"MuCodec,":[124],"designed":[125],"high-quality":[127],"reconstruction":[129,175],"ultra-low":[131,217],"facilitating":[133],"more":[134,235],"efficient":[135],"MuCodec":[138,204],"employs":[139],"two-stage":[141],"training":[142],"method,":[143],"enabling":[144],"its":[145],"encoder,":[146],"MuEncoder,":[147],"extract":[149],"semantic":[150],"acoustic":[152],"features":[153,159,170,185],"unified":[156],"representation.":[157],"These":[158],"are":[160,186],"discretized":[161],"using":[162,191],"residual":[163],"vector":[164],"quantization":[165],"converted":[167],"into":[168,189],"Mel-VAE":[169,184,194],"through":[171],"flow":[172],"matching,":[173],"improved":[177],"by":[178],"representation":[179],"alignment":[180],"during":[181],"training.":[182],"then":[187],"reconstructed":[188],"pretrained":[193],"decoder":[195],"HiFi-GAN.":[197],"best":[200],"of":[201,210,219],"our":[202],"knowledge,":[203],"first":[207],"capable":[209],"reconstructing":[211],"48kHz":[212],"stereo":[213],"bitrate":[218],"0.35":[220],"kbps":[221],"(25":[222],"Hz),":[223],"achieving":[224],"state-of-the-art":[225],"performance":[226],"subjective":[229],"objective":[231],"evaluations,":[232],"effectively":[236],"support":[237],"Code":[240],"Demo:":[242],"https://mucodec.github.io/Mucodec/.":[243]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-25T00:00:00"}
