{"id":"https://openalex.org/W1533874954","doi":"https://doi.org/10.21437/interspeech.2007-332","title":"Smooth soft mel-spectrographic masks based on blind sparse source separation","display_name":"Smooth soft mel-spectrographic masks based on blind sparse source separation","publication_year":2007,"publication_date":"2007-08-27","ids":{"openalex":"https://openalex.org/W1533874954","doi":"https://doi.org/10.21437/interspeech.2007-332","mag":"1533874954"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2007-332","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2007-332","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2007","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5029037494","display_name":"Marco K\u00fchne","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Marco K\u00fchne","raw_affiliation_strings":["School of Electrical, Electronic and Computer Engineering"],"affiliations":[{"raw_affiliation_string":"School of Electrical, Electronic and Computer Engineering","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017213156","display_name":"Roberto Togneri","orcid":"https://orcid.org/0000-0002-3778-4633"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Roberto Togneri","raw_affiliation_strings":["School of Electrical, Electronic and Computer Engineering"],"affiliations":[{"raw_affiliation_string":"School of Electrical, Electronic and Computer Engineering","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023874572","display_name":"Sven Nordholm","orcid":"https://orcid.org/0000-0001-8942-5328"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sven Nordholm","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5029037494"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.6192,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.82788639,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"918","last_page":"921"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11447","display_name":"Blind Source Separation Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11447","display_name":"Blind Source Separation Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11233","display_name":"Advanced Adaptive Filtering Techniques","score":0.9847999811172485,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7633013725280762},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5862550139427185},{"id":"https://openalex.org/keywords/short-time-fourier-transform","display_name":"Short-time Fourier transform","score":0.5847070813179016},{"id":"https://openalex.org/keywords/blind-signal-separation","display_name":"Blind signal separation","score":0.5586637258529663},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48993152379989624},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.48791787028312683},{"id":"https://openalex.org/keywords/deconvolution","display_name":"Deconvolution","score":0.4612841010093689},{"id":"https://openalex.org/keywords/source-separation","display_name":"Source separation","score":0.4550716280937195},{"id":"https://openalex.org/keywords/fourier-transform","display_name":"Fourier transform","score":0.3746655583381653},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.351667582988739},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.12965714931488037}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7633013725280762},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5862550139427185},{"id":"https://openalex.org/C166386157","wikidata":"https://www.wikidata.org/wiki/Q1477735","display_name":"Short-time Fourier transform","level":4,"score":0.5847070813179016},{"id":"https://openalex.org/C120317606","wikidata":"https://www.wikidata.org/wiki/Q17105967","display_name":"Blind signal separation","level":3,"score":0.5586637258529663},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48993152379989624},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.48791787028312683},{"id":"https://openalex.org/C174576160","wikidata":"https://www.wikidata.org/wiki/Q1183700","display_name":"Deconvolution","level":2,"score":0.4612841010093689},{"id":"https://openalex.org/C2776864781","wikidata":"https://www.wikidata.org/wiki/Q52617913","display_name":"Source separation","level":2,"score":0.4550716280937195},{"id":"https://openalex.org/C102519508","wikidata":"https://www.wikidata.org/wiki/Q6520159","display_name":"Fourier transform","level":2,"score":0.3746655583381653},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.351667582988739},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.12965714931488037},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.0},{"id":"https://openalex.org/C203024314","wikidata":"https://www.wikidata.org/wiki/Q1365258","display_name":"Fourier analysis","level":3,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.21437/interspeech.2007-332","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2007-332","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2007","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.atira.dk:publications/4b5e9bac-e2d4-4e67-82ae-feb1c343722a","is_oa":false,"landing_page_url":"https://research-repository.uwa.edu.au/en/publications/4b5e9bac-e2d4-4e67-82ae-feb1c343722a","pdf_url":null,"source":{"id":"https://openalex.org/S4306402523","display_name":"UWA Profiles and Research Repository (University of Western Australia)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I177877127","host_organization_name":"The University of Western Australia","host_organization_lineage":["https://openalex.org/I177877127"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Kuhne, M, Togneri, R & Nordholm, S E 2007, Smooth Soft Mel-Spectrographic Masks Based on Blind Sparse Source Separation. in D Van Compernolle & L Boves (eds), Proceedings of Interspeech 2007. Antwerp, Belgium edn, vol. CD Rom, International Speech Communication Association, Universitat Bonn, Bonn, Germany, pp. 918-921.","raw_type":"contributionToPeriodical"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.44999998807907104,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":6,"referenced_works":["https://openalex.org/W88081813","https://openalex.org/W1485014097","https://openalex.org/W1651266332","https://openalex.org/W1987906574","https://openalex.org/W2025817983","https://openalex.org/W2120962571"],"related_works":["https://openalex.org/W1509813908","https://openalex.org/W2031820693","https://openalex.org/W3024816962","https://openalex.org/W4225639054","https://openalex.org/W973023320","https://openalex.org/W1910172735","https://openalex.org/W2118307209","https://openalex.org/W2107364365","https://openalex.org/W1967434260","https://openalex.org/W2107688514"],"abstract_inverted_index":{"Abstract":[0],"This":[1,246],"paper":[2,247],"investigates":[3],"the":[4,22,75,80,83,91,118,130,147,170,213,242,341],"use":[5,176],"of":[6,82,90,108,120,300,343],"DUET,":[7],"a":[8,36,40,70,87,157,191,194,199,225],"recently":[9,113],"proposedblind":[10],"source":[11,92,237],"separation":[12,123],"method,":[13],"as":[14,139,179,346],"front-end":[15],"for":[16,66,229,268],"missing":[17,99,200],"dataspeech":[18],"recognition.":[19],"Based":[20],"on":[21,312],"attenuation":[23],"and":[24,239],"delay":[25,102],"estima-tion":[26],"in":[27,50,117,212,252,303],"stereo":[28],"signals":[29],"soft":[30,260,297,301,313],"time-frequency":[31,109],"masks":[32,77,244,258,261,302,318],"are":[33,305],"designedto":[34],"extract":[35],"target":[37],"speaker":[38],"from":[39],"mixture":[41],"containing":[42],"multiplespeech":[43],"sources.":[44],"A":[45,333],"postprocessing":[46,326],"step":[47],"is":[48,149],"introduced":[49],"order":[51],"toremove":[52],"isolated":[53],"mask":[54,325],"points":[55],"that":[56,74,289],"can":[57,235,282,290,327],"cause":[58],"insertion":[59],"errors":[60],"inthe":[61],"speech":[62,97,182,203,270],"decoder.":[63],"The":[64,106,160,205],"results":[65],"connected":[67],"digit":[68],"experimentsin":[69],"multi-speaker":[71],"environment":[72],"demonstrate":[73],"proposedsoft":[76],"closely":[78],"match":[79],"performance":[81],"oracle":[84],"maskdesigned":[85],"with":[86,134,259],"priori":[88],"knowledge":[89],"spectra.":[93],"Index":[94],"Terms":[95],":":[96],"recognition,":[98],"data,":[100],"attenuationand":[101],"estimation":[103],"1.":[104],"Introduction":[105],"concept":[107],"(TF)":[110],"masking":[111],"has":[112,129],"at-tracted":[114],"some":[115,294],"interest":[116],"\ufb01eld":[119],"blind":[121],"signal":[122],"(BSS)[1,":[124],"2].":[125],"Demixing":[126],"via":[127],"TF-masks":[128,211],"potential":[131],"to":[132,156,175,209,224,265,284,293,329,339],"separatemixtures":[133],"more":[135,307],"sources":[136],"than":[137],"sensors":[138],"it":[140],"does":[141],"not":[142],"rely":[143],"onmatrix":[144],"inversion.":[145],"Instead":[146],"TF-plane":[148],"partitioned":[150],"into":[151,169],"dis-joint":[152],"regions":[153],"each":[154,166],"assigned":[155],"particular":[158],"source.":[159],"sourcesare":[161],"then":[162],"recovered":[163],"by":[164,296],"converting":[165,219],"region":[167],"back":[168],"timedomain.":[171],"It":[172],"seems":[173],"promising":[174],"BSS":[177,195],"systems":[178],"front-endsfor":[180],"automatic":[181],"recognition":[183,271],"(ASR).":[184],"In":[185,231],"[3]":[186],"we":[187,234,255,321],"have":[188,278],"pro-posed":[189],"such":[190],"combination":[192],"using":[193],"technique":[196],"called":[197],"DUETand":[198],"data":[201],"(MD)":[202],"recognizer.":[204],"proposedsystem":[206],"uses":[207],"DUET":[208],"estimate":[210],"sparse":[214],"Short-Time-Fourier-Transform":[215],"(STFT)":[216],"domain":[217],"before":[218],"thehigh":[220],"STFT":[221],"frequency":[222],"resolution":[223],"perceptual":[226],"mel-frequencyscale":[227],"suitable":[228],"ASR.":[230],"this":[232],"way":[233],"avoid":[236],"re-construction":[238],"directly":[240],"exploit":[241],"spectrographic":[243],"forMD-ASR.":[245],"extends":[248],"our":[249],"previous":[250],"work":[251],"two":[253],"regards.Firstly,":[254],"replace":[256],"binary":[257],"which":[262],"havebeen":[263],"proven":[264],"be":[266,291],"bene\ufb01cial":[267],"both":[269],"andspeech":[272],"enhancement.":[273],"Several":[274],"studies":[275],"[2,":[276],"4]":[277],"reported":[279],"thatbinary":[280],"TF-masking":[281],"lead":[283,328],"audible":[285],"unnatural":[286],"sound":[287],"arti-facts":[288],"avoided":[292],"degree":[295],"masks.":[298],"Theadvantages":[299],"MD-ASR":[304],"even":[306],"evidentas":[308],"marginalization":[309],"approaches":[310],"based":[311],"decisions":[314],"consis-tently":[315],"outperformed":[316],"hard":[317],"[5].":[319],"Secondly,":[320],"show":[322],"thata":[323],"simple":[324],"substantial":[330],"recogni-tion":[331],"improvements.":[332],"two-dimensional":[334],"(2-D)":[335],"median":[336],"\ufb01lter":[337],"wasapplied":[338],"reduce":[340],"in\ufb02uence":[342],"outliers":[344],"visible":[345],"scattered":[347]},"counts_by_year":[{"year":2017,"cited_by_count":1}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
