{"id":"https://openalex.org/W2769025471","doi":"https://doi.org/10.1109/asru.2017.8268938","title":"Unsupervised adaptation with domain separation networks for robust speech recognition","display_name":"Unsupervised adaptation with domain separation networks for robust speech recognition","publication_year":2017,"publication_date":"2017-12-01","ids":{"openalex":"https://openalex.org/W2769025471","doi":"https://doi.org/10.1109/asru.2017.8268938","mag":"2769025471"},"language":"en","primary_location":{"id":"doi:10.1109/asru.2017.8268938","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru.2017.8268938","pdf_url":null,"source":{"id":"https://openalex.org/S4306498158","display_name":"2017 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1711.08010","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101749753","display_name":"Zhong Meng","orcid":"https://orcid.org/0000-0001-7814-5929"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]},{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Zhong Meng","raw_affiliation_strings":["Georgia Institute of Technology, Atlanta, GA","Microsoft AI and Research, Redmond, WA"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology, Atlanta, GA","institution_ids":["https://openalex.org/I130701444"]},{"raw_affiliation_string":"Microsoft AI and Research, Redmond, WA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100345092","display_name":"Zhuo Chen","orcid":"https://orcid.org/0000-0002-9011-7928"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhuo Chen","raw_affiliation_strings":["Microsoft AI and Research, Redmond, WA"],"affiliations":[{"raw_affiliation_string":"Microsoft AI and Research, Redmond, WA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042972227","display_name":"Vadim Mazalov","orcid":null},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vadim Mazalov","raw_affiliation_strings":["Microsoft AI and Research, Redmond, WA"],"affiliations":[{"raw_affiliation_string":"Microsoft AI and Research, Redmond, WA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100365053","display_name":"Jinyu Li","orcid":"https://orcid.org/0000-0002-1089-9748"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jinyu Li","raw_affiliation_strings":["Microsoft AI and Research, Redmond, WA"],"affiliations":[{"raw_affiliation_string":"Microsoft AI and Research, Redmond, WA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077401426","display_name":"Yifan Gong","orcid":"https://orcid.org/0000-0001-8786-3391"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yifan Gong","raw_affiliation_strings":["Microsoft AI and Research, Redmond, WA"],"affiliations":[{"raw_affiliation_string":"Microsoft AI and Research, Redmond, WA","institution_ids":["https://openalex.org/I1290206253"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101749753"],"corresponding_institution_ids":["https://openalex.org/I1290206253","https://openalex.org/I130701444"],"apc_list":null,"apc_paid":null,"fwci":8.9311,"has_fulltext":false,"cited_by_count":70,"citation_normalized_percentile":{"value":0.98178739,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"214","last_page":"221"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9921000003814697,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7482147216796875},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6106441617012024},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.6082512140274048},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5686340928077698},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5615778565406799},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.48209479451179504},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.4813852608203888},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4466012418270111},{"id":"https://openalex.org/keywords/domain-adaptation","display_name":"Domain adaptation","score":0.43080201745033264},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4140719175338745},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4104998707771301},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3456904888153076}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7482147216796875},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6106441617012024},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.6082512140274048},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5686340928077698},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5615778565406799},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.48209479451179504},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.4813852608203888},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4466012418270111},{"id":"https://openalex.org/C2776434776","wikidata":"https://www.wikidata.org/wiki/Q19246213","display_name":"Domain adaptation","level":3,"score":0.43080201745033264},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4140719175338745},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4104998707771301},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3456904888153076},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/asru.2017.8268938","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru.2017.8268938","pdf_url":null,"source":{"id":"https://openalex.org/S4306498158","display_name":"2017 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1711.08010","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1711.08010","pdf_url":"https://arxiv.org/pdf/1711.08010","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1711.08010","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1711.08010","pdf_url":"https://arxiv.org/pdf/1711.08010","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.7400000095367432,"id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":45,"referenced_works":["https://openalex.org/W1882958252","https://openalex.org/W1961429348","https://openalex.org/W1984541135","https://openalex.org/W1989549063","https://openalex.org/W1992475611","https://openalex.org/W2010362084","https://openalex.org/W2013598660","https://openalex.org/W2056738732","https://openalex.org/W2076794394","https://openalex.org/W2080005694","https://openalex.org/W2099471712","https://openalex.org/W2160306971","https://openalex.org/W2160815625","https://openalex.org/W2181607856","https://openalex.org/W2239847623","https://openalex.org/W2289394825","https://openalex.org/W2289731793","https://openalex.org/W2293634267","https://openalex.org/W2294543795","https://openalex.org/W2296748324","https://openalex.org/W2394882406","https://openalex.org/W2394932179","https://openalex.org/W2402040300","https://openalex.org/W2407793339","https://openalex.org/W2510867321","https://openalex.org/W2511131004","https://openalex.org/W2584667682","https://openalex.org/W2587088898","https://openalex.org/W2636483419","https://openalex.org/W2641129314","https://openalex.org/W2735006420","https://openalex.org/W2749784707","https://openalex.org/W2953127297","https://openalex.org/W2962894366","https://openalex.org/W2963826681","https://openalex.org/W2964182776","https://openalex.org/W3112742522","https://openalex.org/W3125118953","https://openalex.org/W4320013936","https://openalex.org/W6639480849","https://openalex.org/W6639822467","https://openalex.org/W6686045668","https://openalex.org/W6713823255","https://openalex.org/W6725448924","https://openalex.org/W6732862412"],"related_works":["https://openalex.org/W4312246223","https://openalex.org/W3119773509","https://openalex.org/W3208297503","https://openalex.org/W2889153461","https://openalex.org/W2964117661","https://openalex.org/W4388405611","https://openalex.org/W2619127353","https://openalex.org/W3048601286","https://openalex.org/W2400428875","https://openalex.org/W2965925734"],"abstract_inverted_index":{"Unsupervised":[0],"domain":[1,66,100,110,162],"adaptation":[2,171],"of":[3,28,58,65,76,108,135,137],"speech":[4,150,192],"signal":[5],"aims":[6],"at":[7],"adapting":[8],"a":[9,81,112,159,185],"well-trained":[10],"source-domain":[11],"acoustic":[12,33],"model":[13],"to":[14,35,52,122,146,167],"the":[15,48,55,62,85,93,96,105,126,133,138,148,153,168,180],"unlabeled":[16],"data":[17],"from":[18,152,179],"target":[19,99],"domain.":[20],"This":[21,161],"can":[22],"be":[23,123],"achieved":[24,174],"by":[25,102],"adversarial":[26,69,187],"training":[27,188],"deep":[29,39],"neural":[30],"network":[31],"(DNN)":[32],"models":[34],"learn":[36],"an":[37],"intermediate":[38],"representation":[40],"that":[41],"is":[42,50,120,144,165],"both":[43],"senone-discriminative":[44],"and":[45,61,98,129,155,173],"domain-invariant.":[46],"Specifically,":[47],"DNN":[49,143],"trained":[51,121],"jointly":[53],"optimize":[54],"primary":[56],"task":[57,64,172],"senone":[59],"classification":[60,67],"secondary":[63],"with":[68,125],"objective":[70],"functions.":[71],"In":[72],"this":[73],"work,":[74],"instead":[75],"only":[77],"focusing":[78],"on":[79,194],"learning":[80],"domain-invariant":[82],"feature":[83,151],"(i.e.":[84],"shared":[86,127,139,156],"component":[87,107,114,119,128],"between":[88,95],"domains),":[89],"we":[90],"also":[91],"characterize":[92],"difference":[94],"source":[97],"distributions":[101],"explicitly":[103],"modeling":[104],"private":[106,113,118,154],"each":[109],"through":[111],"extractor":[115],"DNN.":[116],"The":[117],"orthogonal":[124],"thus":[130],"implicitly":[131],"increases":[132],"degree":[134],"domain-invariance":[136],"component.":[140],"A":[141],"reconstructor":[142],"used":[145],"reconstruct":[147],"original":[149],"components":[157],"as":[158],"regularization.":[160],"separation":[163],"framework":[164],"applied":[166],"unsupervised":[169],"environment":[170],"11.08%":[175],"relative":[176],"WER":[177],"reduction":[178],"gradient":[181],"reversal":[182],"layer":[183],"training,":[184],"representative":[186],"method,":[189],"for":[190],"automatic":[191],"recognition":[193],"CHiME-3":[195],"dataset.":[196]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":17},{"year":2020,"cited_by_count":18},{"year":2019,"cited_by_count":11},{"year":2018,"cited_by_count":14}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
