{"id":"https://openalex.org/W2342291550","doi":"https://doi.org/10.1109/icassp.2016.7472688","title":"Speaker cluster-based speaker adaptive training for deep neural network acoustic modeling","display_name":"Speaker cluster-based speaker adaptive training for deep neural network acoustic modeling","publication_year":2016,"publication_date":"2016-03-01","ids":{"openalex":"https://openalex.org/W2342291550","doi":"https://doi.org/10.1109/icassp.2016.7472688","mag":"2342291550"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2016.7472688","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2016.7472688","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1604.06113","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101906116","display_name":"Wei Chu","orcid":"https://orcid.org/0000-0002-4595-388X"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wei Chu","raw_affiliation_strings":["Sony Computer Entertainment America, San Mateo, CA"],"affiliations":[{"raw_affiliation_string":"Sony Computer Entertainment America, San Mateo, CA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5081135618","display_name":"Ruxin Chen","orcid":"https://orcid.org/0009-0000-0624-4127"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ruxin Chen","raw_affiliation_strings":["Sony Computer Entertainment America, San Mateo, CA"],"affiliations":[{"raw_affiliation_string":"Sony Computer Entertainment America, San Mateo, CA","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5101906116"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.01842015,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"5295","last_page":"5299"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7806670665740967},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6690512895584106},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.6423844695091248},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.6326051354408264},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.5767563581466675},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.5755509734153748},{"id":"https://openalex.org/keywords/speaker-verification","display_name":"Speaker verification","score":0.5114218592643738},{"id":"https://openalex.org/keywords/cluster","display_name":"Cluster (spacecraft)","score":0.4766070246696472},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.36091989278793335},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.11600705981254578}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7806670665740967},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6690512895584106},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.6423844695091248},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.6326051354408264},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.5767563581466675},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.5755509734153748},{"id":"https://openalex.org/C2982762665","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker verification","level":3,"score":0.5114218592643738},{"id":"https://openalex.org/C164866538","wikidata":"https://www.wikidata.org/wiki/Q367351","display_name":"Cluster (spacecraft)","level":2,"score":0.4766070246696472},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36091989278793335},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.11600705981254578},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icassp.2016.7472688","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2016.7472688","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1604.06113","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1604.06113","pdf_url":"https://arxiv.org/pdf/1604.06113","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1604.06113","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1604.06113","pdf_url":"https://arxiv.org/pdf/1604.06113","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"score":0.7900000214576721,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W82936479","https://openalex.org/W165923638","https://openalex.org/W1221962394","https://openalex.org/W1513820424","https://openalex.org/W1524333225","https://openalex.org/W1599512239","https://openalex.org/W2002342963","https://openalex.org/W2015633636","https://openalex.org/W2026369565","https://openalex.org/W2056825827","https://openalex.org/W2069631319","https://openalex.org/W2087006792","https://openalex.org/W2090320273","https://openalex.org/W2094147890","https://openalex.org/W2294543795","https://openalex.org/W2396230943","https://openalex.org/W2402704384","https://openalex.org/W2403797310","https://openalex.org/W4302557958","https://openalex.org/W6631362777","https://openalex.org/W6696912008","https://openalex.org/W6712100661","https://openalex.org/W6713297655","https://openalex.org/W6982185968"],"related_works":["https://openalex.org/W2206035908","https://openalex.org/W66821593","https://openalex.org/W1521299571","https://openalex.org/W2162158162","https://openalex.org/W4235705411","https://openalex.org/W4247736853","https://openalex.org/W1493012537","https://openalex.org/W1999004162","https://openalex.org/W2144470400","https://openalex.org/W2911612049"],"abstract_inverted_index":{"A":[0],"speaker":[1,3,42,59,68,82,131],"cluster-based":[2],"adaptive":[4],"training":[5,111],"(SAT)":[6],"method":[7,98],"under":[8],"deep":[9],"neural":[10],"network-hidden":[11],"Markov":[12],"model":[13],"(DNN-HMM)":[14],"framework":[15],"is":[16,63,84,107,147],"presented":[17],"in":[18,60,145],"this":[19],"paper.":[20],"During":[21],"training,":[22],"speakers":[23,124],"that":[24],"are":[25,31,45],"acoustically":[26],"adjacent":[27],"to":[28,65,129],"each":[29,50],"other":[30],"hierarchically":[32],"clustered":[33],"using":[34],"an":[35,57],"i-vector":[36,72],"based":[37,73],"distance":[38],"metric.":[39],"DNNs":[40],"with":[41,125,134],"dependent":[43],"layers":[44],"then":[46],"adaptively":[47],"trained":[48,77],"for":[49,86],"cluster":[51,69,83],"of":[52,79,89,95,113,116,122,139],"speakers.":[53],"Before":[54],"decoding":[55,87],"starts,":[56],"unseen":[58],"test":[61,91,120],"set":[62,112,121],"matched":[64,81],"the":[66,80,90,96,150],"closest":[67],"through":[70],"comparing":[71],"distances.":[74],"The":[75,93],"previously":[76],"DNN":[78,133],"used":[85],"utterances":[88],"speaker.":[92],"performance":[94,146],"proposed":[97,151],"on":[99,109],"a":[100,110,119,130,135,141],"large":[101],"vocabulary":[102],"spontaneous":[103],"speech":[104],"recognition":[105],"task":[106],"evaluated":[108],"1500":[114],"hours":[115],"speech,":[117],"and":[118],"24":[123],"1774":[126],"utterances.":[127],"Compared":[128],"independent":[132],"word":[136],"error":[137],"rate":[138],"11.6%,":[140],"relative":[142],"6.8%":[143],"improvement":[144],"obtained":[148],"from":[149],"method.":[152]},"counts_by_year":[],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
