{"id":"https://openalex.org/W2885724687","doi":"https://doi.org/10.1109/slt.2018.8639575","title":"Learning Noise-Invariant Representations for Robust Speech Recognition","display_name":"Learning Noise-Invariant Representations for Robust Speech Recognition","publication_year":2018,"publication_date":"2018-12-01","ids":{"openalex":"https://openalex.org/W2885724687","doi":"https://doi.org/10.1109/slt.2018.8639575","mag":"2885724687"},"language":"en","primary_location":{"id":"doi:10.1109/slt.2018.8639575","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt.2018.8639575","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112447939","display_name":"Davis Liang","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Davis Liang","raw_affiliation_strings":["Amazon AI, Palo Alto, CA"],"affiliations":[{"raw_affiliation_string":"Amazon AI, Palo Alto, CA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011349177","display_name":"Zhiheng Huang","orcid":"https://orcid.org/0000-0001-5982-9778"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhiheng Huang","raw_affiliation_strings":["Amazon AI, Palo Alto, CA"],"affiliations":[{"raw_affiliation_string":"Amazon AI, Palo Alto, CA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5029448258","display_name":"Zachary C. Lipton","orcid":"https://orcid.org/0000-0002-3824-4241"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zachary C. Lipton","raw_affiliation_strings":["Amazon AI, Palo Alto, CA"],"affiliations":[{"raw_affiliation_string":"Amazon AI, Palo Alto, CA","institution_ids":["https://openalex.org/I1311688040"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5112447939"],"corresponding_institution_ids":["https://openalex.org/I1311688040"],"apc_list":null,"apc_paid":null,"fwci":5.0767,"has_fulltext":false,"cited_by_count":45,"citation_normalized_percentile":{"value":0.96218888,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"56","last_page":"63"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6573466062545776},{"id":"https://openalex.org/keywords/invariant","display_name":"Invariant (physics)","score":0.6539578437805176},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.634515643119812},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.5940539240837097},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.5489110350608826},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.45610031485557556},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4290686249732971},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4245326817035675},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.41075998544692993},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.26950228214263916}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6573466062545776},{"id":"https://openalex.org/C190470478","wikidata":"https://www.wikidata.org/wiki/Q2370229","display_name":"Invariant (physics)","level":2,"score":0.6539578437805176},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.634515643119812},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.5940539240837097},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.5489110350608826},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.45610031485557556},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4290686249732971},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4245326817035675},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.41075998544692993},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.26950228214263916},{"id":"https://openalex.org/C37914503","wikidata":"https://www.wikidata.org/wiki/Q156495","display_name":"Mathematical physics","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/slt.2018.8639575","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt.2018.8639575","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.800000011920929,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320338335","display_name":"H2020 European Research Council","ror":"https://ror.org/0472cxd90"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":62,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1599512239","https://openalex.org/W1731081199","https://openalex.org/W1736701665","https://openalex.org/W1828163288","https://openalex.org/W1902237438","https://openalex.org/W1922655562","https://openalex.org/W1964929969","https://openalex.org/W1992475611","https://openalex.org/W2022799064","https://openalex.org/W2062164080","https://openalex.org/W2067117291","https://openalex.org/W2096653978","https://openalex.org/W2111406701","https://openalex.org/W2124136621","https://openalex.org/W2130942839","https://openalex.org/W2133564696","https://openalex.org/W2141520175","https://openalex.org/W2143612262","https://openalex.org/W2148154194","https://openalex.org/W2151899309","https://openalex.org/W2160687178","https://openalex.org/W2163605009","https://openalex.org/W2193413348","https://openalex.org/W2219249508","https://openalex.org/W2290318471","https://openalex.org/W2503204202","https://openalex.org/W2520160253","https://openalex.org/W2584667682","https://openalex.org/W2616030108","https://openalex.org/W2625954004","https://openalex.org/W2765920929","https://openalex.org/W2791953061","https://openalex.org/W2799800213","https://openalex.org/W2962778134","https://openalex.org/W2962826786","https://openalex.org/W2963211739","https://openalex.org/W2963403868","https://openalex.org/W2963850025","https://openalex.org/W2964308564","https://openalex.org/W4385245566","https://openalex.org/W6629717138","https://openalex.org/W6637618735","https://openalex.org/W6638749077","https://openalex.org/W6640090968","https://openalex.org/W6648656139","https://openalex.org/W6656140581","https://openalex.org/W6667544397","https://openalex.org/W6676562027","https://openalex.org/W6679434410","https://openalex.org/W6679436768","https://openalex.org/W6682812381","https://openalex.org/W6684191040","https://openalex.org/W6687566353","https://openalex.org/W6688816777","https://openalex.org/W6696830301","https://openalex.org/W6727336983","https://openalex.org/W6732862412","https://openalex.org/W6739901393","https://openalex.org/W6745640171","https://openalex.org/W6748965907","https://openalex.org/W6752048366"],"related_works":["https://openalex.org/W2062195135","https://openalex.org/W2795079307","https://openalex.org/W2793058541","https://openalex.org/W3118638206","https://openalex.org/W1983629434","https://openalex.org/W4324271173","https://openalex.org/W1967645776","https://openalex.org/W2352227742","https://openalex.org/W4390679071","https://openalex.org/W1480923359"],"abstract_inverted_index":{"Despite":[0],"rapid":[1],"advances":[2],"in":[3],"speech":[4],"recognition,":[5],"current":[6],"models":[7,31],"remain":[8],"brittle":[9],"to":[10,13,43,69,77,104,174],"superficial":[11],"perturbations":[12],"their":[14],"inputs.":[15],"Small":[16],"amounts":[17],"of":[18,24],"noise":[19,150],"can":[20],"destroy":[21],"the":[22,44,49,70,78,120,124,178],"performance":[23],"an":[25],"otherwise":[26],"state-of-the-art":[27],"model.":[28],"To":[29],"harden":[30],"against":[32],"background":[33],"noise,":[34],"practitioners":[35],"often":[36],"perform":[37],"data":[38],"augmentation,":[39],"adding":[40],"artificially-noised":[41],"examples":[42],"training":[45,87,91],"set,":[46],"carrying":[47],"over":[48],"original":[50],"label.":[51],"In":[52],"this":[53],"paper,":[54],"we":[55,93],"hypothesize":[56],"that":[57,167],"a":[58,95,101],"clean":[59],"example":[60],"and":[61,140],"its":[62],"superficially":[63],"perturbed":[64],"counterparts":[65],"shouldn't":[66],"merely":[67],"map":[68,76],"same":[71,79],"class":[72],"-":[73],"they":[74],"should":[75],"representation.":[80],"We":[81,98],"propose":[82],"invariant-representation-learning":[83],"(IRL):":[84],"At":[85],"each":[86,90,109],"iteration,":[88],"for":[89],"example,":[92],"sample":[94],"noisy":[96],"counterpart.":[97],"then":[99],"apply":[100],"penalty":[102],"term":[103],"coerce":[105],"matched":[106],"representations":[107],"at":[108,177],"layer":[110],"(above":[111],"some":[112],"chosen":[113,179],"layer).":[114],"Our":[115],"key":[116],"results,":[117],"demonstrated":[118],"on":[119,134,148],"LibriSpeech":[121],"dataset":[122],"are":[123,160,170],"following:":[125],"(i)":[126],"IRL":[127],"significantly":[128],"reduces":[129],"character":[130],"error":[131],"rates":[132],"(CER)":[133],"both":[135],"`clean'":[136],"(3.3%":[137],"vs":[138,143],"6.5%)":[139],"`other'":[141],"(11.0%":[142],"18.1%)":[144],"test":[145],"sets;":[146],"(ii)":[147],"several-of-domain":[149],"settings":[151],"(different":[152],"from":[153],"those":[154],"seen":[155],"during":[156],"training)":[157],"IRL's":[158],"benefits":[159],"even":[161],"more":[162],"pronounced.":[163],"Careful":[164],"ablations":[165],"confirm":[166],"our":[168],"results":[169],"not":[171],"simply":[172],"due":[173],"shrinking":[175],"activations":[176],"layers.":[180]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":12},{"year":2020,"cited_by_count":9},{"year":2019,"cited_by_count":9}],"updated_date":"2026-03-27T14:29:43.386196","created_date":"2025-10-10T00:00:00"}
