{"id":"https://openalex.org/W4210481535","doi":"https://doi.org/10.1109/asru51503.2021.9688090","title":"Far-Field Speech Recognition Based on Complex-Valued Neural Networks and Inter-Frame Similarity Difference Method","display_name":"Far-Field Speech Recognition Based on Complex-Valued Neural Networks and Inter-Frame Similarity Difference Method","publication_year":2021,"publication_date":"2021-12-13","ids":{"openalex":"https://openalex.org/W4210481535","doi":"https://doi.org/10.1109/asru51503.2021.9688090"},"language":"en","primary_location":{"id":"doi:10.1109/asru51503.2021.9688090","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru51503.2021.9688090","pdf_url":null,"source":{"id":"https://openalex.org/S4363606113","display_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5026759570","display_name":"Yifan Guo","orcid":"https://orcid.org/0000-0002-9700-5005"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yifan Guo","raw_affiliation_strings":["Institute of Acoustics, Chinese Academy of Sciences,Key Laboratory of Speech Acoustics and Content Understanding","University of Chinese Academy of Sciences","Key Laboratory of Speech Acoustics and Content Understanding, Institute of Acoustics, Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Institute of Acoustics, Chinese Academy of Sciences,Key Laboratory of Speech Acoustics and Content Understanding","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]},{"raw_affiliation_string":"University of Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Key Laboratory of Speech Acoustics and Content Understanding, Institute of Acoustics, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100405158","display_name":"Yifan Chen","orcid":"https://orcid.org/0009-0002-6946-7699"},"institutions":[{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yifan Chen","raw_affiliation_strings":["Institute of Acoustics, Chinese Academy of Sciences,Key Laboratory of Speech Acoustics and Content Understanding","Key Laboratory of Speech Acoustics and Content Understanding, Institute of Acoustics, Chinese Academy of Sciences","University of Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Institute of Acoustics, Chinese Academy of Sciences,Key Laboratory of Speech Acoustics and Content Understanding","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Key Laboratory of Speech Acoustics and Content Understanding, Institute of Acoustics, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]},{"raw_affiliation_string":"University of Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072887461","display_name":"Gaofeng Cheng","orcid":"https://orcid.org/0000-0002-2102-6061"},"institutions":[{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Gaofeng Cheng","raw_affiliation_strings":["Institute of Acoustics, Chinese Academy of Sciences,Key Laboratory of Speech Acoustics and Content Understanding","Key Laboratory of Speech Acoustics and Content Understanding, Institute of Acoustics, Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Institute of Acoustics, Chinese Academy of Sciences,Key Laboratory of Speech Acoustics and Content Understanding","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Key Laboratory of Speech Acoustics and Content Understanding, Institute of Acoustics, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036446253","display_name":"Pengyuan Zhang","orcid":"https://orcid.org/0000-0001-6838-5160"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pengyuan Zhang","raw_affiliation_strings":["Institute of Acoustics, Chinese Academy of Sciences,Key Laboratory of Speech Acoustics and Content Understanding","Key Laboratory of Speech Acoustics and Content Understanding, Institute of Acoustics, Chinese Academy of Sciences","University of Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Institute of Acoustics, Chinese Academy of Sciences,Key Laboratory of Speech Acoustics and Content Understanding","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Key Laboratory of Speech Acoustics and Content Understanding, Institute of Acoustics, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]},{"raw_affiliation_string":"University of Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100425112","display_name":"Yonghong Yan","orcid":"https://orcid.org/0000-0001-6907-5770"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yonghong Yan","raw_affiliation_strings":["Institute of Acoustics, Chinese Academy of Sciences,Key Laboratory of Speech Acoustics and Content Understanding","Key Laboratory of Speech Acoustics and Content Understanding, Institute of Acoustics, Chinese Academy of Sciences","University of Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Institute of Acoustics, Chinese Academy of Sciences,Key Laboratory of Speech Acoustics and Content Understanding","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Key Laboratory of Speech Acoustics and Content Understanding, Institute of Acoustics, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]},{"raw_affiliation_string":"University of Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210165038"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5026759570"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210099069","https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":0.3441,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.59234385,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1003","last_page":"1010"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7399387955665588},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6679967641830444},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.6416807174682617},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5702911019325256},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.5445752739906311},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.5012257099151611},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.435769259929657},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4244532287120819},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.410347580909729},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.40512531995773315},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.37670260667800903},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.23861846327781677},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.14557012915611267},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.08239269256591797}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7399387955665588},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6679967641830444},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.6416807174682617},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5702911019325256},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.5445752739906311},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.5012257099151611},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.435769259929657},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4244532287120819},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.410347580909729},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40512531995773315},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.37670260667800903},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.23861846327781677},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.14557012915611267},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.08239269256591797},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru51503.2021.9688090","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru51503.2021.9688090","pdf_url":null,"source":{"id":"https://openalex.org/S4363606113","display_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.550000011920929,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[{"id":"https://openalex.org/G6258922347","display_name":null,"funder_award_id":"62071461,11774380","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W87496662","https://openalex.org/W1494198834","https://openalex.org/W1980993072","https://openalex.org/W2039553327","https://openalex.org/W2066218102","https://openalex.org/W2117678320","https://openalex.org/W2168729028","https://openalex.org/W2219249508","https://openalex.org/W2288217446","https://openalex.org/W2398042854","https://openalex.org/W2589857635","https://openalex.org/W2696967604","https://openalex.org/W2766219058","https://openalex.org/W2921496354","https://openalex.org/W2954695182","https://openalex.org/W2963045393","https://openalex.org/W2977305403","https://openalex.org/W2981627360","https://openalex.org/W3003441391","https://openalex.org/W3015527782","https://openalex.org/W3015651375","https://openalex.org/W3035661013","https://openalex.org/W3096073522","https://openalex.org/W3140109466","https://openalex.org/W3209141406","https://openalex.org/W4206319965","https://openalex.org/W4249052411","https://openalex.org/W4297779827","https://openalex.org/W4385245566","https://openalex.org/W6688816777","https://openalex.org/W6735168207","https://openalex.org/W6771497439","https://openalex.org/W6774550102","https://openalex.org/W6778073312","https://openalex.org/W6802983977"],"related_works":["https://openalex.org/W1630865680","https://openalex.org/W2375480909","https://openalex.org/W2353314428","https://openalex.org/W2373767407","https://openalex.org/W2012019886","https://openalex.org/W2770665941","https://openalex.org/W3096184950","https://openalex.org/W4231424160","https://openalex.org/W2275432853","https://openalex.org/W197907117"],"abstract_inverted_index":{"Far-field":[0],"automatic":[1],"speech":[2,39],"recognition":[3],"(ASR)":[4],"is":[5],"a":[6,22,32,42,58,122,162,170],"challenging":[7],"task":[8],"due":[9],"to":[10,62,100,134],"the":[11,64,71,83,94,106,109,113,116,127,131,141,148,166,174],"background":[12],"noise":[13],"and":[14,156,169],"reverberation.":[15],"To":[16],"address":[17],"this":[18],"issue,":[19],"we":[20,30,46,89],"introduce":[21],"novel":[23],"end-to-end":[24],"multi-channel":[25],"far-field":[26],"ASR":[27,110],"architecture.":[28],"First,":[29],"use":[31],"complex-valued":[33,123],"CNN":[34],"based":[35],"architecture":[36],"designed":[37],"for":[38,126],"tasks":[40],"as":[41,105],"neural":[43,65,117,132],"beamformer.":[44,118],"Second,":[45],"propose":[47,90],"an":[48,91],"auxiliary":[49],"mod-ule":[50],"called":[51,93],"absolute":[52],"position":[53,59],"regression":[54],"module":[55,125],"(APRM)":[56],"with":[57],"prediction":[60],"loss":[61],"help":[63],"beamformer":[66,133],"be":[67],"better":[68],"aware":[69],"of":[70,74,86,108,115,130],"corresponding":[72],"frequencies":[73],"each":[75,136],"input":[76],"time-frequency":[77],"(T-F)":[78],"bin.":[79],"Third,":[80],"inspired":[81],"by":[82],"short-term":[84],"stationarity":[85],"human":[87],"speech,":[88],"approach":[92],"Inter-Frame":[95],"Similarity":[96],"Difference":[97],"(IFSD)":[98],"method":[99],"au-tomatically":[101],"select":[102],"useful":[103],"channels":[104,129],"inputs":[107],"backend":[111],"from":[112,144],"outputs":[114,143],"We":[119],"also":[120],"implement":[121],"attention":[124],"output":[128],"utilize":[135],"other's":[137],"in-formation,":[138],"thereby":[139],"preventing":[140],"final":[142],"information":[145],"loss.":[146],"With":[147],"above":[149],"innovations,":[150],"our":[151],"proposed":[152],"model":[153],"achieves":[154],"9.7%":[155],"11.1%":[157],"relative":[158],"WER":[159],"reductions":[160],"over":[161],"DNN-MVDR":[163],"baseline":[164],"on":[165],"CHiME4":[167],"dataset":[168,171],"simulated":[172],"using":[173],"Librispeech":[175],"corpus.":[176]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2022,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
