{"id":"https://openalex.org/W4403792235","doi":"https://doi.org/10.1145/3664647.3680985","title":"MAJL: A Model-Agnostic Joint Learning Framework for Music Source Separation and Pitch Estimation","display_name":"MAJL: A Model-Agnostic Joint Learning Framework for Music Source Separation and Pitch Estimation","publication_year":2024,"publication_date":"2024-10-26","ids":{"openalex":"https://openalex.org/W4403792235","doi":"https://doi.org/10.1145/3664647.3680985"},"language":"en","primary_location":{"id":"doi:10.1145/3664647.3680985","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3680985","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101939672","display_name":"Haojie Wei","orcid":"https://orcid.org/0000-0003-2716-7866"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Haojie Wei","raw_affiliation_strings":["School of Information, Renmin University of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Information, Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100870091","display_name":"Jun Yuan","orcid":"https://orcid.org/0000-0001-6003-9714"},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Yuan","raw_affiliation_strings":["Huawei Noah's Ark Lab, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Huawei Noah's Ark Lab, Shenzhen, China","institution_ids":["https://openalex.org/I2250955327"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100422092","display_name":"Rui Zhang","orcid":"https://orcid.org/0000-0002-8132-6250"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rui Zhang","raw_affiliation_strings":["School of Computer Science and Technology, Huazhong University of Science and Technology (www.ruizhang.info), Wuhan, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Huazhong University of Science and Technology (www.ruizhang.info), Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048783161","display_name":"Quanyu Dai","orcid":"https://orcid.org/0000-0001-7578-2738"},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Quanyu Dai","raw_affiliation_strings":["Huawei Noah's Ark Lab, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Huawei Noah's Ark Lab, Shenzhen, China","institution_ids":["https://openalex.org/I2250955327"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040815903","display_name":"Yueguo Chen","orcid":"https://orcid.org/0000-0002-2239-4472"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yueguo Chen","raw_affiliation_strings":["School of Information, Renmin University of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Information, Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101939672"],"corresponding_institution_ids":["https://openalex.org/I78988378"],"apc_list":null,"apc_paid":null,"fwci":0.375,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.58684013,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"8623","last_page":"8632"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/source-separation","display_name":"Source separation","score":0.8361101150512695},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7063490748405457},{"id":"https://openalex.org/keywords/joint","display_name":"Joint (building)","score":0.7027797102928162},{"id":"https://openalex.org/keywords/estimation","display_name":"Estimation","score":0.5487086772918701},{"id":"https://openalex.org/keywords/separation","display_name":"Separation (statistics)","score":0.46065402030944824},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4454525113105774},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3987690210342407},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.27279990911483765},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09399375319480896}],"concepts":[{"id":"https://openalex.org/C2776864781","wikidata":"https://www.wikidata.org/wiki/Q52617913","display_name":"Source separation","level":2,"score":0.8361101150512695},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7063490748405457},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.7027797102928162},{"id":"https://openalex.org/C96250715","wikidata":"https://www.wikidata.org/wiki/Q965330","display_name":"Estimation","level":2,"score":0.5487086772918701},{"id":"https://openalex.org/C2776061190","wikidata":"https://www.wikidata.org/wiki/Q7451805","display_name":"Separation (statistics)","level":2,"score":0.46065402030944824},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4454525113105774},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3987690210342407},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.27279990911483765},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09399375319480896},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C170154142","wikidata":"https://www.wikidata.org/wiki/Q150737","display_name":"Architectural engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3664647.3680985","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3680985","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W1975079546","https://openalex.org/W2091425152","https://openalex.org/W2108771579","https://openalex.org/W2113217465","https://openalex.org/W2118774185","https://openalex.org/W2130770109","https://openalex.org/W2516392987","https://openalex.org/W2906214917","https://openalex.org/W2913754224","https://openalex.org/W2962866891","https://openalex.org/W2964070952","https://openalex.org/W2972353749","https://openalex.org/W2986673441","https://openalex.org/W2997938083","https://openalex.org/W3011176162","https://openalex.org/W3015832727","https://openalex.org/W3160649916","https://openalex.org/W3161758688","https://openalex.org/W3174257816","https://openalex.org/W4225311783","https://openalex.org/W4225401194","https://openalex.org/W4372260250","https://openalex.org/W4375928773","https://openalex.org/W4385572615","https://openalex.org/W4385768169","https://openalex.org/W4385823161","https://openalex.org/W4389519009","https://openalex.org/W4389520437","https://openalex.org/W4393158690","https://openalex.org/W4393160357","https://openalex.org/W4396723563","https://openalex.org/W4401863256"],"related_works":["https://openalex.org/W2071676784","https://openalex.org/W4292513318","https://openalex.org/W4308092240","https://openalex.org/W2287611352","https://openalex.org/W320684304","https://openalex.org/W1509352139","https://openalex.org/W4385464961","https://openalex.org/W2060903012","https://openalex.org/W2021161555","https://openalex.org/W2077498359"],"abstract_inverted_index":{"Music":[0],"source":[1,27,159],"separation":[2,160],"and":[3,71,95,109,128,161],"pitch":[4,18,169],"estimation":[5,19],"are":[6],"two":[7,37,56],"vital":[8],"tasks":[9,38],"in":[10,153,163,192],"music":[11,26,137,158],"information":[12],"retrieval.":[13],"Typically,":[14],"the":[15,23,44,61,66,123,177,187],"input":[16],"of":[17,25,63,68,125,151,179,182,190],"is":[20,91],"obtained":[21],"from":[22],"output":[24],"separation.":[28],"Therefore,":[29],"existing":[30],"methods":[31,53,144],"have":[32],"tried":[33],"to":[34,42,194],"perform":[35],"these":[36,52,77],"simultaneously,":[39],"so":[40],"as":[41],"leverage":[43],"mutually":[45],"beneficial":[46],"relationship":[47],"between":[48],"both":[49,64,88,146],"tasks.":[50,89],"However,":[51],"still":[54],"face":[55],"critical":[57],"challenges":[58],"that":[59,140],"limit":[60],"improvement":[62],"tasks:":[65],"lack":[67,124],"labeled":[69,126],"data":[70,127],"joint":[72,129],"learning":[73,130],"optimization.":[74],"To":[75],"address":[76],"challenges,":[78],"we":[79],"propose":[80],"a":[81,92,105,110],"Model-Agnostic":[82],"Joint":[83],"Learning":[84],"(MAJL)":[85],"framework":[86,94],"for":[87,100,157,168],"MAJL":[90,141,191],"generic":[93],"can":[96],"use":[97],"variant":[98],"models":[99],"each":[101,180],"task.":[102],"It":[103],"includes":[104],"two-stage":[106],"training":[107],"method":[108,113],"dynamic":[111],"weighting":[112],"named":[114],"Dynamic":[115],"Weights":[116],"on":[117,135,145],"Hard":[118],"Samples":[119],"(DWHS),":[120],"which":[121],"addresses":[122],"optimization,":[131],"respectively.":[132],"Experimental":[133],"results":[134],"public":[136],"datasets":[138],"show":[139],"outperforms":[142],"state-of-the-art":[143],"tasks,":[147],"with":[148],"significant":[149],"improvements":[150],"0.92":[152],"Signal-to-Distortion":[154],"Ratio":[155],"(SDR)":[156],"2.71%":[162],"Raw":[164],"Pitch":[165],"Accuracy":[166],"(RPA)":[167],"estimation.":[170],"Furthermore,":[171],"comprehensive":[172],"studies":[173],"not":[174],"only":[175],"validate":[176],"effectiveness":[178],"component":[181],"MAJL,":[183],"but":[184],"also":[185],"indicate":[186],"great":[188],"generality":[189],"adapting":[193],"different":[195],"model":[196],"architectures.":[197]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
