{"id":"https://openalex.org/W2807947437","doi":"https://doi.org/10.21437/interspeech.2018-1097","title":"Unsupervised Adaptation with Interpretable Disentangled Representations for Distant Conversational Speech Recognition","display_name":"Unsupervised Adaptation with Interpretable Disentangled Representations for Distant Conversational Speech Recognition","publication_year":2018,"publication_date":"2018-08-28","ids":{"openalex":"https://openalex.org/W2807947437","doi":"https://doi.org/10.21437/interspeech.2018-1097","mag":"2807947437"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2018-1097","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2018-1097","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2018","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1806.04872","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5051950818","display_name":"Wei-Ning Hsu","orcid":"https://orcid.org/0000-0001-5546-5217"},"institutions":[{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Wei-Ning Hsu","raw_affiliation_strings":["Massachusetts Institute of Technology, Cambridge, United States"],"affiliations":[{"raw_affiliation_string":"Massachusetts Institute of Technology, Cambridge, United States","institution_ids":["https://openalex.org/I63966007"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100662187","display_name":"Hao Tang","orcid":"https://orcid.org/0000-0002-2445-2605"},"institutions":[{"id":"https://openalex.org/I37802460","display_name":"Northwest University","ror":"https://ror.org/00z3td547","country_code":"CN","type":"education","lineage":["https://openalex.org/I37802460"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hao Tang","raw_affiliation_strings":["Northwest University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"Northwest University, Xi'an, China","institution_ids":["https://openalex.org/I37802460"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5112758056","display_name":"James Glass","orcid":"https://orcid.org/0000-0002-3097-360X"},"institutions":[{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"James Glass","raw_affiliation_strings":["Massachusetts Institute of Technology, Cambridge, United States"],"affiliations":[{"raw_affiliation_string":"Massachusetts Institute of Technology, Cambridge, United States","institution_ids":["https://openalex.org/I63966007"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5051950818"],"corresponding_institution_ids":["https://openalex.org/I63966007"],"apc_list":null,"apc_paid":null,"fwci":0.6769,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.77210735,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1576","last_page":"1580"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9936000108718872,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9919999837875366,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7503232359886169},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.7322264313697815},{"id":"https://openalex.org/keywords/utterance","display_name":"Utterance","score":0.6322278380393982},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5398229360580444},{"id":"https://openalex.org/keywords/domain-adaptation","display_name":"Domain adaptation","score":0.5360169410705566},{"id":"https://openalex.org/keywords/labeled-data","display_name":"Labeled data","score":0.5295522809028625},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5229190587997437},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5047131776809692},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.48818808794021606},{"id":"https://openalex.org/keywords/latent-variable","display_name":"Latent variable","score":0.4476010799407959},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.43842217326164246},{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.42905810475349426},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.41363513469696045},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.39390629529953003},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08995366096496582}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7503232359886169},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.7322264313697815},{"id":"https://openalex.org/C2775852435","wikidata":"https://www.wikidata.org/wiki/Q258403","display_name":"Utterance","level":2,"score":0.6322278380393982},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5398229360580444},{"id":"https://openalex.org/C2776434776","wikidata":"https://www.wikidata.org/wiki/Q19246213","display_name":"Domain adaptation","level":3,"score":0.5360169410705566},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.5295522809028625},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5229190587997437},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5047131776809692},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.48818808794021606},{"id":"https://openalex.org/C51167844","wikidata":"https://www.wikidata.org/wiki/Q4422623","display_name":"Latent variable","level":2,"score":0.4476010799407959},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.43842217326164246},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.42905810475349426},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.41363513469696045},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.39390629529953003},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08995366096496582},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.21437/interspeech.2018-1097","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2018-1097","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2018","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1806.04872","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1806.04872","pdf_url":"https://arxiv.org/pdf/1806.04872","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:2807947437","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1806.04872.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1806.04872","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1806.04872","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1806.04872","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1806.04872","pdf_url":"https://arxiv.org/pdf/1806.04872","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2807947437.pdf","grobid_xml":"https://content.openalex.org/works/W2807947437.grobid-xml"},"referenced_works_count":27,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1524333225","https://openalex.org/W1909320841","https://openalex.org/W2002342963","https://openalex.org/W2057653135","https://openalex.org/W2083751884","https://openalex.org/W2094147890","https://openalex.org/W2099621636","https://openalex.org/W2181607856","https://openalex.org/W2239847623","https://openalex.org/W2293634267","https://openalex.org/W2402144811","https://openalex.org/W2407080277","https://openalex.org/W2512209472","https://openalex.org/W2587088898","https://openalex.org/W2622203030","https://openalex.org/W2625327772","https://openalex.org/W2757672955","https://openalex.org/W2769025471","https://openalex.org/W2951004968","https://openalex.org/W2962793481","https://openalex.org/W2962850167","https://openalex.org/W2963417023","https://openalex.org/W2963618559","https://openalex.org/W2963917928","https://openalex.org/W2964084166","https://openalex.org/W2964245029"],"related_works":["https://openalex.org/W3111966420","https://openalex.org/W2520436654","https://openalex.org/W2972158108","https://openalex.org/W2896591327","https://openalex.org/W2900553892","https://openalex.org/W2810453092","https://openalex.org/W3034765384","https://openalex.org/W2903457322","https://openalex.org/W2797433568","https://openalex.org/W2962835731","https://openalex.org/W2916681389","https://openalex.org/W1984650017","https://openalex.org/W3204178525","https://openalex.org/W3213875247","https://openalex.org/W2996946635","https://openalex.org/W2951855756","https://openalex.org/W113837456","https://openalex.org/W3192693288","https://openalex.org/W2794887779","https://openalex.org/W2998869886"],"abstract_inverted_index":{"The":[0],"current":[1],"trend":[2],"in":[3],"automatic":[4],"speech":[5,97],"recognition":[6],"is":[7,39,149,164],"to":[8,15,30,42,56,71],"leverage":[9],"large":[10,44],"amounts":[11,45],"of":[12,28,46,96,147],"labeled":[13,73,84,115],"data":[14,23,48,74,82],"train":[16,31],"supervised":[17],"neural":[18],"network":[19],"models.":[20],"Unfortunately,":[21],"obtaining":[22],"for":[24,75,158],"a":[25,64,114,140],"wide":[26],"range":[27],"domains":[29,50],"robust":[32],"models":[33,55,181],"can":[34],"be":[35],"costly.":[36],"However,":[37],"it":[38],"relatively":[40],"inexpensive":[41],"collect":[43],"unlabeled":[47,80],"from":[49,79],"that":[51,69,98],"we":[52,62,122,137],"want":[53],"the":[54,76,124,130,145,167,175],"generalize":[57],"to.":[58],"In":[59],"this":[60],"paper,":[61],"propose":[63],"novel":[65],"unsupervised":[66],"adaptation":[67],"method":[68,163],"learns":[70],"synthesize":[72],"target":[77],"domain":[78,146],"in-domain":[81,180],"and":[83,101,106,153,173,179],"out-of-domain":[85,116],"data.":[86,189],"We":[87],"first":[88],"learn":[89],"without":[90,118,185],"supervision":[91],"an":[92],"interpretable":[93],"latent":[94,110,125],"representation":[95],"encodes":[99],"linguistic":[100,131],"nuisance":[102,126],"factors":[103],"(e.g.,":[104],"speaker":[105],"channel)":[107],"using":[108,186],"different":[109],"variables.":[111,132],"To":[112,133],"transform":[113,123],"utterance":[117],"altering":[119],"its":[120],"transcript,":[121],"variables":[127],"while":[128],"maintaining":[129],"demonstrate":[134],"our":[135],"approach,":[136],"focus":[138],"on":[139,166],"channel":[141],"mismatch":[142],"setting,":[143],"where":[144],"interest":[148],"distant":[150],"conversational":[151],"speech,":[152],"labels":[154],"are":[155],"only":[156],"available":[157],"close-talking":[159],"speech.":[160],"Our":[161],"proposed":[162],"evaluated":[165],"AMI":[168],"dataset,":[169],"outperforming":[170],"all":[171],"baselines":[172],"bridging":[174],"gap":[176],"between":[177],"unadapted":[178],"by":[182],"over":[183],"77%":[184],"any":[187],"parallel":[188]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2018,"cited_by_count":2}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
