{"id":"https://openalex.org/W4392904432","doi":"https://doi.org/10.1109/icassp48485.2024.10446951","title":"DJCM: A Deep Joint Cascade Model for Singing Voice Separation and Vocal Pitch Estimation","display_name":"DJCM: A Deep Joint Cascade Model for Singing Voice Separation and Vocal Pitch Estimation","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392904432","doi":"https://doi.org/10.1109/icassp48485.2024.10446951"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10446951","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp48485.2024.10446951","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101939672","display_name":"Haojie Wei","orcid":"https://orcid.org/0000-0003-2716-7866"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Haojie Wei","raw_affiliation_strings":["Renmin University of China,School of Information,Beijing,China","School of Information, Renmin University of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Renmin University of China,School of Information,Beijing,China","institution_ids":["https://openalex.org/I78988378"]},{"raw_affiliation_string":"School of Information, Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101319196","display_name":"Xueke Cao","orcid":null},"institutions":[{"id":"https://openalex.org/I21193070","display_name":"Beijing Jiaotong University","ror":"https://ror.org/01yj56c84","country_code":"CN","type":"education","lineage":["https://openalex.org/I21193070"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xueke Cao","raw_affiliation_strings":["Beijing Jiaotong University,Institute of Information Science,Beijing,China","Institute of Information Science, Beijing Jiaotong University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing Jiaotong University,Institute of Information Science,Beijing,China","institution_ids":["https://openalex.org/I21193070"]},{"raw_affiliation_string":"Institute of Information Science, Beijing Jiaotong University, Beijing, China","institution_ids":["https://openalex.org/I21193070"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100720681","display_name":"Wenbo Xu","orcid":"https://orcid.org/0000-0001-8704-1937"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenbo Xu","raw_affiliation_strings":["Renmin University of China,School of Information,Beijing,China","School of Information, Renmin University of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Renmin University of China,School of Information,Beijing,China","institution_ids":["https://openalex.org/I78988378"]},{"raw_affiliation_string":"School of Information, Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022310235","display_name":"Tangpeng Dan","orcid":"https://orcid.org/0000-0003-0824-5494"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tangpeng Dan","raw_affiliation_strings":["Renmin University of China,School of Information,Beijing,China","School of Information, Renmin University of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Renmin University of China,School of Information,Beijing,China","institution_ids":["https://openalex.org/I78988378"]},{"raw_affiliation_string":"School of Information, Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5057384573","display_name":"Yueguo Chen","orcid":"https://orcid.org/0000-0003-2720-7135"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yueguo Chen","raw_affiliation_strings":["Renmin University of China,School of Information,Beijing,China","School of Information, Renmin University of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Renmin University of China,School of Information,Beijing,China","institution_ids":["https://openalex.org/I78988378"]},{"raw_affiliation_string":"School of Information, Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101939672"],"corresponding_institution_ids":["https://openalex.org/I78988378"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.03013163,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"301","last_page":"305"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7868556976318359},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7090486288070679},{"id":"https://openalex.org/keywords/singing","display_name":"Singing","score":0.7070073485374451},{"id":"https://openalex.org/keywords/cascade","display_name":"Cascade","score":0.6960116028785706},{"id":"https://openalex.org/keywords/joint","display_name":"Joint (building)","score":0.6702070236206055},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6015272736549377},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6004956364631653},{"id":"https://openalex.org/keywords/source-separation","display_name":"Source separation","score":0.5381579995155334},{"id":"https://openalex.org/keywords/separation","display_name":"Separation (statistics)","score":0.5144148468971252},{"id":"https://openalex.org/keywords/distortion","display_name":"Distortion (music)","score":0.48353123664855957},{"id":"https://openalex.org/keywords/pitch-detection-algorithm","display_name":"Pitch detection algorithm","score":0.476146936416626},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4663951098918915},{"id":"https://openalex.org/keywords/signal","display_name":"SIGNAL (programming language)","score":0.41096752882003784},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3270971179008484},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.28359130024909973},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.22074705362319946},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.0998583436012268},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08251118659973145}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7868556976318359},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7090486288070679},{"id":"https://openalex.org/C44819458","wikidata":"https://www.wikidata.org/wiki/Q27939","display_name":"Singing","level":2,"score":0.7070073485374451},{"id":"https://openalex.org/C34146451","wikidata":"https://www.wikidata.org/wiki/Q5048094","display_name":"Cascade","level":2,"score":0.6960116028785706},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.6702070236206055},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6015272736549377},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6004956364631653},{"id":"https://openalex.org/C2776864781","wikidata":"https://www.wikidata.org/wiki/Q52617913","display_name":"Source separation","level":2,"score":0.5381579995155334},{"id":"https://openalex.org/C2776061190","wikidata":"https://www.wikidata.org/wiki/Q7451805","display_name":"Separation (statistics)","level":2,"score":0.5144148468971252},{"id":"https://openalex.org/C126780896","wikidata":"https://www.wikidata.org/wiki/Q899871","display_name":"Distortion (music)","level":4,"score":0.48353123664855957},{"id":"https://openalex.org/C135622632","wikidata":"https://www.wikidata.org/wiki/Q7198851","display_name":"Pitch detection algorithm","level":3,"score":0.476146936416626},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4663951098918915},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.41096752882003784},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3270971179008484},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.28359130024909973},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.22074705362319946},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.0998583436012268},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08251118659973145},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C170154142","wikidata":"https://www.wikidata.org/wiki/Q150737","display_name":"Architectural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C42360764","wikidata":"https://www.wikidata.org/wiki/Q83588","display_name":"Chemical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C194257627","wikidata":"https://www.wikidata.org/wiki/Q211554","display_name":"Amplifier","level":3,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10446951","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp48485.2024.10446951","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Gender equality","score":0.4699999988079071,"id":"https://metadata.un.org/sdg/5"}],"awards":[{"id":"https://openalex.org/G1231421488","display_name":null,"funder_award_id":"under","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3527113235","display_name":null,"funder_award_id":"62272466","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G391238517","display_name":null,"funder_award_id":", and","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322499","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1975079546","https://openalex.org/W2017548439","https://openalex.org/W2091425152","https://openalex.org/W2118774185","https://openalex.org/W2124539664","https://openalex.org/W2407685581","https://openalex.org/W2516392987","https://openalex.org/W2774707525","https://openalex.org/W2932319787","https://openalex.org/W2962866891","https://openalex.org/W2963452667","https://openalex.org/W2986673441","https://openalex.org/W2990594533","https://openalex.org/W2997938083","https://openalex.org/W3015832727","https://openalex.org/W3037149862","https://openalex.org/W3094550259","https://openalex.org/W3161758688","https://openalex.org/W3192255518","https://openalex.org/W3213726885","https://openalex.org/W4225401194","https://openalex.org/W4286980120","https://openalex.org/W4289242435","https://openalex.org/W4372260250","https://openalex.org/W4385768169","https://openalex.org/W4385822304","https://openalex.org/W6631190155","https://openalex.org/W6678626330","https://openalex.org/W6714030504","https://openalex.org/W6746914816","https://openalex.org/W6751512325","https://openalex.org/W6756251360","https://openalex.org/W6801426729","https://openalex.org/W6804608017","https://openalex.org/W6846880785"],"related_works":["https://openalex.org/W2390529913","https://openalex.org/W2142368101","https://openalex.org/W2153719181","https://openalex.org/W2372249404","https://openalex.org/W2367547137","https://openalex.org/W2354994102","https://openalex.org/W2368461988","https://openalex.org/W2077498359","https://openalex.org/W2100891835","https://openalex.org/W4372260329"],"abstract_inverted_index":{"Singing":[0],"voice":[1,116,171],"separation":[2,117,172],"and":[3,22,33,72,118,173],"vocal":[4,23,119,182],"pitch":[5,120,183],"estimation":[6],"are":[7,138],"pivotal":[8],"tasks":[9],"in":[10,163,175],"music":[11],"information":[12],"retrieval.":[13],"Existing":[14],"methods":[15,32,43,55,82],"for":[16,58,114,169,181],"simultaneous":[17],"extraction":[18],"of":[19,41,87,99,144,161,165,177,192,195],"clean":[20],"vocals":[21],"pitches":[24],"can":[25],"be":[26],"classified":[27],"into":[28],"two":[29],"categories:":[30],"pipeline":[31,54],"naive":[34,79],"joint":[35,80,126],"learning":[36,81],"methods.":[37],"However,":[38],"the":[39,47,51,66,70,76,85,96,190],"efficacy":[40],"these":[42,104],"is":[44],"limited":[45],"by":[46],"following":[48],"problems:":[49],"On":[50,75],"one":[52],"hand,":[53,78],"train":[56,132],"models":[57],"each":[59,100,193],"task":[60],"independently,":[61],"resulting":[62],"a":[63,93,108,124],"mismatch":[64],"between":[65,95],"data":[67],"distributions":[68],"at":[69],"training":[71],"testing":[73],"time.":[74],"other":[77],"simply":[83],"add":[84],"losses":[86],"both":[88,133,145,156],"tasks,":[89,157],"possibly":[90],"leading":[91],"to":[92,130,140],"misalignment":[94],"distinct":[97],"objectives":[98,143],"task.":[101],"To":[102],"solve":[103],"problems,":[105],"we":[106],"propose":[107],"Deep":[109],"Joint":[110],"Cascade":[111],"Model":[112],"(DJCM)":[113],"singing":[115,170],"estimation.":[121,184],"DJCM":[122,151],"employs":[123],"novel":[125],"cascade":[127],"model":[128],"structure":[129],"concurrently":[131],"tasks.":[134,146],"Moreover,":[135],"task-specific":[136],"weights":[137],"used":[139],"align":[141],"different":[142],"Experimental":[147],"results":[148],"show":[149],"that":[150],"achieves":[152],"state-of-the-art":[153],"performance":[154],"on":[155],"with":[158],"great":[159],"improvements":[160],"0.45":[162],"terms":[164,176],"Signal-to-Distortion":[166],"Ratio":[167],"(SDR)":[168],"2.86%":[174],"Overall":[178],"Accuracy":[179],"(OA)":[180],"Furthermore,":[185],"extensive":[186],"ablation":[187],"studies":[188],"validate":[189],"effectiveness":[191],"design":[194],"our":[196],"proposed":[197],"model.":[198]},"counts_by_year":[],"updated_date":"2026-03-18T14:38:29.013473","created_date":"2024-03-19T00:00:00"}
