{"id":"https://openalex.org/W2964058423","doi":"https://doi.org/10.1109/icassp.2018.8462169","title":"Deep Factorization for Speech Signal","display_name":"Deep Factorization for Speech Signal","publication_year":2018,"publication_date":"2018-04-01","ids":{"openalex":"https://openalex.org/W2964058423","doi":"https://doi.org/10.1109/icassp.2018.8462169","mag":"2964058423"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2018.8462169","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2018.8462169","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050701255","display_name":"Lantian Li","orcid":"https://orcid.org/0000-0003-4274-7930"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Lantian Li","raw_affiliation_strings":["Department of Computer Science and Technology, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100391494","display_name":"Dong Wang","orcid":"https://orcid.org/0000-0002-6992-7950"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dong Wang","raw_affiliation_strings":["Department of Computer Science and Technology, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087953918","display_name":"Yixiang Chen","orcid":"https://orcid.org/0000-0001-8596-7963"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yixiang Chen","raw_affiliation_strings":["Department of Computer Science and Technology, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102001805","display_name":"Ying Shi","orcid":"https://orcid.org/0000-0001-8445-2300"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ying Shi","raw_affiliation_strings":["Department of Computer Science and Technology, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056437232","display_name":"Zhiyuan Tang","orcid":"https://orcid.org/0000-0002-3786-7690"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiyuan Tang","raw_affiliation_strings":["Department of Computer Science and Technology, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5084318285","display_name":"Thomas Fang Zheng","orcid":"https://orcid.org/0000-0002-0249-4767"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Thomas Fang Zheng","raw_affiliation_strings":["Department of Computer Science and Technology, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5050701255"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":3.2577,"has_fulltext":false,"cited_by_count":29,"citation_normalized_percentile":{"value":0.93799375,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"5094","last_page":"5098"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8079719543457031},{"id":"https://openalex.org/keywords/factorization","display_name":"Factorization","score":0.6941860914230347},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6766764521598816},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.5527353882789612},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.478362500667572},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.4476281702518463},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.4463127851486206},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4401305913925171},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.42441898584365845},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3694179654121399},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.156462162733078}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8079719543457031},{"id":"https://openalex.org/C187834632","wikidata":"https://www.wikidata.org/wiki/Q188804","display_name":"Factorization","level":2,"score":0.6941860914230347},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6766764521598816},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.5527353882789612},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.478362500667572},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.4476281702518463},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.4463127851486206},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4401305913925171},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.42441898584365845},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3694179654121399},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.156462162733078},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp.2018.8462169","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2018.8462169","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W330298975","https://openalex.org/W629186223","https://openalex.org/W1524333225","https://openalex.org/W1589137271","https://openalex.org/W2030067664","https://openalex.org/W2039057510","https://openalex.org/W2043108680","https://openalex.org/W2074788634","https://openalex.org/W2107638917","https://openalex.org/W2150769028","https://openalex.org/W2160815625","https://openalex.org/W2165698076","https://openalex.org/W2183016404","https://openalex.org/W2401812832","https://openalex.org/W2491899193","https://openalex.org/W2535097731","https://openalex.org/W2566925314","https://openalex.org/W2962832278","https://openalex.org/W2962993426","https://openalex.org/W2963522845","https://openalex.org/W2964269671","https://openalex.org/W4210849719","https://openalex.org/W4239943352","https://openalex.org/W6631362777","https://openalex.org/W6712730384","https://openalex.org/W6728849968","https://openalex.org/W6737563559"],"related_works":["https://openalex.org/W2794559785","https://openalex.org/W3203142394","https://openalex.org/W4375867731","https://openalex.org/W4302615923","https://openalex.org/W1974101135","https://openalex.org/W2351061015","https://openalex.org/W2950281908","https://openalex.org/W1754499339","https://openalex.org/W2013873776","https://openalex.org/W2017509870"],"abstract_inverted_index":{"Various":[0],"informative":[1,28],"factors":[2,89,95],"mixed":[3],"in":[4,81,90,156],"speech":[5,24,88,116,124,143],"signals,":[6,117],"leading":[7],"to":[8,21,34,47],"great":[9],"difficulty":[10],"when":[11,103],"decoding":[12],"any":[13],"of":[14],"the":[15,122,157],"factors.":[16,106],"An":[17],"intuitive":[18],"idea":[19],"is":[20],"factorize":[22,115],"each":[23],"frame":[25],"into":[26],"individual":[27],"factors,":[29,121],"though":[30],"it":[31],"turns":[32],"out":[33],"be":[35,48,58,79,127,154],"highly":[36],"difficult.":[37],"Recently,":[38],"we":[39],"found":[40],"that":[41,77,110],"speaker":[42,147],"traits,":[43],"which":[44],"were":[45],"assumed":[46],"long-term":[49],"distributional":[50],"properties,":[51],"are":[52,98],"actually":[53],"short-time":[54],"patterns,":[55],"and":[56,118,135,149],"can":[57,113,126],"learned":[59],"by":[60],"a":[61,71,91,130],"carefully":[62],"designed":[63],"deep":[64,73],"neural":[65],"network":[66],"(DNN).":[67],"This":[68,133],"discovery":[69],"motivated":[70],"cascade":[72],"factorization":[74,134],"(CDF)":[75],"framework":[76,86],"will":[78,108,153],"presented":[80],"this":[82,111],"paper.":[83,158],"The":[84],"proposed":[85],"infers":[87],"sequential":[92],"way,":[93],"where":[94],"previously":[96],"inferred":[97],"used":[99],"as":[100,152],"conditional":[101],"variables":[102],"inferring":[104],"other":[105],"We":[107],"show":[109],"approach":[112,137],"effectively":[114],"using":[119],"these":[120],"original":[123],"spectrum":[125],"recovered":[128],"with":[129],"high":[131],"accuracy.":[132],"reconstruction":[136],"provides":[138],"potential":[139],"values":[140],"for":[141],"many":[142],"processing":[144],"tasks,":[145],"e.g.,":[146],"recognition":[148],"emotion":[150],"recognition,":[151],"demonstrated":[155]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":6},{"year":2020,"cited_by_count":6},{"year":2019,"cited_by_count":8},{"year":2017,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
