{"id":"https://openalex.org/W3135392873","doi":"https://doi.org/10.23919/eusipco54536.2021.9616127","title":"HTMD-Net: A Hybrid Masking-Denoising Approach to Time-Domain Monaural Singing Voice Separation","display_name":"HTMD-Net: A Hybrid Masking-Denoising Approach to Time-Domain Monaural Singing Voice Separation","publication_year":2021,"publication_date":"2021-08-23","ids":{"openalex":"https://openalex.org/W3135392873","doi":"https://doi.org/10.23919/eusipco54536.2021.9616127","mag":"3135392873"},"language":"en","primary_location":{"id":"doi:10.23919/eusipco54536.2021.9616127","is_oa":false,"landing_page_url":"https://doi.org/10.23919/eusipco54536.2021.9616127","pdf_url":null,"source":{"id":"https://openalex.org/S4363607854","display_name":"2021 29th European Signal Processing Conference (EUSIPCO)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 29th European Signal Processing Conference (EUSIPCO)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072908898","display_name":"Christos Garoufis","orcid":"https://orcid.org/0000-0002-1714-3943"},"institutions":[{"id":"https://openalex.org/I174458059","display_name":"National Technical University of Athens","ror":"https://ror.org/03cx6bg69","country_code":"GR","type":"education","lineage":["https://openalex.org/I174458059"]},{"id":"https://openalex.org/I4210156054","display_name":"Athena Research and Innovation Center In Information Communication & Knowledge Technologies","ror":"https://ror.org/0576by029","country_code":"GR","type":"facility","lineage":["https://openalex.org/I4210156054"]}],"countries":["GR"],"is_corresponding":true,"raw_author_name":"Christos Garoufis","raw_affiliation_strings":["Robot Perception and Interaction Unit, Athena Research Center, Maroussi, Greece","School of ECE, National Technical University of Athens, Athens, Greece"],"affiliations":[{"raw_affiliation_string":"Robot Perception and Interaction Unit, Athena Research Center, Maroussi, Greece","institution_ids":["https://openalex.org/I4210156054"]},{"raw_affiliation_string":"School of ECE, National Technical University of Athens, Athens, Greece","institution_ids":["https://openalex.org/I174458059"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000495716","display_name":"Athanasia Zlatintsi","orcid":"https://orcid.org/0000-0002-1922-9310"},"institutions":[{"id":"https://openalex.org/I4210156054","display_name":"Athena Research and Innovation Center In Information Communication & Knowledge Technologies","ror":"https://ror.org/0576by029","country_code":"GR","type":"facility","lineage":["https://openalex.org/I4210156054"]},{"id":"https://openalex.org/I174458059","display_name":"National Technical University of Athens","ror":"https://ror.org/03cx6bg69","country_code":"GR","type":"education","lineage":["https://openalex.org/I174458059"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Athanasia Zlatintsi","raw_affiliation_strings":["Robot Perception and Interaction Unit, Athena Research Center, Maroussi, Greece","School of ECE, National Technical University of Athens, Athens, Greece"],"affiliations":[{"raw_affiliation_string":"Robot Perception and Interaction Unit, Athena Research Center, Maroussi, Greece","institution_ids":["https://openalex.org/I4210156054"]},{"raw_affiliation_string":"School of ECE, National Technical University of Athens, Athens, Greece","institution_ids":["https://openalex.org/I174458059"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079243420","display_name":"Petros Maragos","orcid":"https://orcid.org/0000-0003-0534-2707"},"institutions":[{"id":"https://openalex.org/I174458059","display_name":"National Technical University of Athens","ror":"https://ror.org/03cx6bg69","country_code":"GR","type":"education","lineage":["https://openalex.org/I174458059"]},{"id":"https://openalex.org/I4210156054","display_name":"Athena Research and Innovation Center In Information Communication & Knowledge Technologies","ror":"https://ror.org/0576by029","country_code":"GR","type":"facility","lineage":["https://openalex.org/I4210156054"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Petros Maragos","raw_affiliation_strings":["Robot Perception and Interaction Unit, Athena Research Center, Maroussi, Greece","School of ECE, National Technical University of Athens, Athens, Greece"],"affiliations":[{"raw_affiliation_string":"Robot Perception and Interaction Unit, Athena Research Center, Maroussi, Greece","institution_ids":["https://openalex.org/I4210156054"]},{"raw_affiliation_string":"School of ECE, National Technical University of Athens, Athens, Greece","institution_ids":["https://openalex.org/I174458059"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5072908898"],"corresponding_institution_ids":["https://openalex.org/I174458059","https://openalex.org/I4210156054"],"apc_list":null,"apc_paid":null,"fwci":0.172,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.31027535,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"341","last_page":"345"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9934999942779541,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/monaural","display_name":"Monaural","score":0.9500547647476196},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8254619836807251},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5775044560432434},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5026595592498779},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.4747771918773651},{"id":"https://openalex.org/keywords/masking","display_name":"Masking (illustration)","score":0.4608793556690216},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4222612679004669},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4035540223121643}],"concepts":[{"id":"https://openalex.org/C102894143","wikidata":"https://www.wikidata.org/wiki/Q1323979","display_name":"Monaural","level":2,"score":0.9500547647476196},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8254619836807251},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5775044560432434},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5026595592498779},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.4747771918773651},{"id":"https://openalex.org/C2777402240","wikidata":"https://www.wikidata.org/wiki/Q6783436","display_name":"Masking (illustration)","level":2,"score":0.4608793556690216},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4222612679004669},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4035540223121643},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.23919/eusipco54536.2021.9616127","is_oa":false,"landing_page_url":"https://doi.org/10.23919/eusipco54536.2021.9616127","pdf_url":null,"source":{"id":"https://openalex.org/S4363607854","display_name":"2021 29th European Signal Processing Conference (EUSIPCO)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 29th European Signal Processing Conference (EUSIPCO)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W2018735295","https://openalex.org/W2127851351","https://openalex.org/W2150415460","https://openalex.org/W2164098335","https://openalex.org/W2774707525","https://openalex.org/W2796571515","https://openalex.org/W2903502793","https://openalex.org/W2916985722","https://openalex.org/W2952218014","https://openalex.org/W2954304925","https://openalex.org/W2963191020","https://openalex.org/W2963341071","https://openalex.org/W2963452667","https://openalex.org/W2963519193","https://openalex.org/W2963698842","https://openalex.org/W2963751183","https://openalex.org/W2963992487","https://openalex.org/W2964070952","https://openalex.org/W2964121744","https://openalex.org/W2972411915","https://openalex.org/W2990594533","https://openalex.org/W2991107219","https://openalex.org/W2997987128","https://openalex.org/W2998678989","https://openalex.org/W3012787261","https://openalex.org/W3015199127","https://openalex.org/W3015201698","https://openalex.org/W3015721817","https://openalex.org/W3099330747","https://openalex.org/W3165246035","https://openalex.org/W4298310324","https://openalex.org/W6631190155","https://openalex.org/W6746914816","https://openalex.org/W6751356808","https://openalex.org/W6751512325","https://openalex.org/W6775116504","https://openalex.org/W6891901120"],"related_works":["https://openalex.org/W2036157531","https://openalex.org/W2056406069","https://openalex.org/W1974981856","https://openalex.org/W1518859147","https://openalex.org/W2045506488","https://openalex.org/W3214716754","https://openalex.org/W2401567014","https://openalex.org/W1976239252","https://openalex.org/W1614994442","https://openalex.org/W2035580387"],"abstract_inverted_index":{"The":[0],"advent":[1],"of":[2,10,38,50,68,115,121],"deep":[3,11],"learning":[4],"has":[5],"led":[6],"to":[7,42,54,65,70,105,139],"the":[8,27,39,55,59,66,71,88,107,111,119,127,148,153],"prevalence":[9],"neural":[12],"network":[13],"architectures":[14],"for":[15],"monaural":[16,122],"music":[17],"source":[18,72,108],"separation,":[19],"with":[20],"end-to-end":[21],"approaches":[22],"that":[23,131],"operate":[24],"directly":[25],"on":[26,100,143],"waveform":[28],"level":[29],"increasingly":[30],"receiving":[31],"research":[32],"attention.":[33],"Among":[34],"these":[35],"approaches,":[36],"transformation":[37],"input":[40],"mixture":[41],"a":[43,51,83,91,96],"learned":[44],"latent":[45,56],"space,":[46],"and":[47,95],"multiplicative":[48],"application":[49],"soft":[52],"mask":[53],"mixture,":[57],"achieves":[58,135],"best":[60],"performance,":[61],"but":[62],"is":[63],"prone":[64],"introduction":[67],"artifacts":[69],"estimate.":[73],"To":[74],"alleviate":[75],"this":[76,79],"problem,":[77],"in":[78,103,118,126],"paper":[80],"we":[81],"propose":[82],"hybrid":[84],"time-domain":[85],"approach,":[86],"termed":[87],"HTMD-Net,":[89],"combining":[90],"lightweight":[92],"masking":[93,112,144],"component":[94],"denoising":[97],"module,":[98],"based":[99,141],"skip":[101],"connections,":[102],"order":[104],"refine":[106],"estimated":[109],"by":[110],"procedure.":[113],"Evaluation":[114],"our":[116,132],"approach":[117],"task":[120],"singing":[123],"voice":[124],"separation":[125],"musdb18":[128],"dataset":[129],"indicates":[130],"proposed":[133],"method":[134],"competitive":[136],"performance":[137],"compared":[138],"methods":[140],"purely":[142],"when":[145],"trained":[146],"under":[147],"same":[149],"conditions,":[150],"especially":[151],"regarding":[152],"behavior":[154],"during":[155],"silent":[156],"segments,":[157],"while":[158],"achieving":[159],"higher":[160],"computational":[161],"efficiency.":[162]},"counts_by_year":[{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
