{"id":"https://openalex.org/W2962833129","doi":"https://doi.org/10.21437/interspeech.2016-761","title":"Automatic Pronunciation Generation by Utilizing a Semi-Supervised Deep Neural Networks","display_name":"Automatic Pronunciation Generation by Utilizing a Semi-Supervised Deep Neural Networks","publication_year":2016,"publication_date":"2016-08-28","ids":{"openalex":"https://openalex.org/W2962833129","doi":"https://doi.org/10.21437/interspeech.2016-761","mag":"2962833129"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2016-761","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2016-761","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2016","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101589290","display_name":"Naoya Takahashi","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Naoya Takahashi","raw_affiliation_strings":["Sony Corporation, Japan"],"affiliations":[{"raw_affiliation_string":"Sony Corporation, Japan","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067551398","display_name":"Tofigh Naghibi","orcid":null},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Tofigh Naghibi","raw_affiliation_strings":["Speech Processing Group, ETH Zurich, Switzerland"],"affiliations":[{"raw_affiliation_string":"Speech Processing Group, ETH Zurich, Switzerland","institution_ids":["https://openalex.org/I35440088"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5111975528","display_name":"Beat Pfister","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Beat Pfister","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101589290"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.4285,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.8196653,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1141","last_page":"1145"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9940999746322632,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pronunciation","display_name":"Pronunciation","score":0.9335095882415771},{"id":"https://openalex.org/keywords/timit","display_name":"TIMIT","score":0.840978741645813},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8396767377853394},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6782060265541077},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6108742952346802},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.608007550239563},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5071418285369873},{"id":"https://openalex.org/keywords/transcription","display_name":"Transcription (linguistics)","score":0.5034820437431335},{"id":"https://openalex.org/keywords/phonetic-transcription","display_name":"Phonetic transcription","score":0.4945225417613983},{"id":"https://openalex.org/keywords/speech-corpus","display_name":"Speech corpus","score":0.46090635657310486},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4530359208583832},{"id":"https://openalex.org/keywords/acoustic-model","display_name":"Acoustic model","score":0.4465067386627197},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4384550452232361},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.38958555459976196},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.3058682084083557},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.2529671788215637},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.06699711084365845}],"concepts":[{"id":"https://openalex.org/C2780844864","wikidata":"https://www.wikidata.org/wiki/Q184377","display_name":"Pronunciation","level":2,"score":0.9335095882415771},{"id":"https://openalex.org/C2778724510","wikidata":"https://www.wikidata.org/wiki/Q7670405","display_name":"TIMIT","level":3,"score":0.840978741645813},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8396767377853394},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6782060265541077},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6108742952346802},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.608007550239563},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5071418285369873},{"id":"https://openalex.org/C179926584","wikidata":"https://www.wikidata.org/wiki/Q207714","display_name":"Transcription (linguistics)","level":2,"score":0.5034820437431335},{"id":"https://openalex.org/C2777853878","wikidata":"https://www.wikidata.org/wiki/Q743569","display_name":"Phonetic transcription","level":2,"score":0.4945225417613983},{"id":"https://openalex.org/C91863865","wikidata":"https://www.wikidata.org/wiki/Q4349497","display_name":"Speech corpus","level":3,"score":0.46090635657310486},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4530359208583832},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.4465067386627197},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4384550452232361},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.38958555459976196},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.3058682084083557},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.2529671788215637},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.06699711084365845},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2016-761","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2016-761","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2016","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.7099999785423279}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W139772320","https://openalex.org/W1165382972","https://openalex.org/W1964917299","https://openalex.org/W1998653683","https://openalex.org/W2050526637","https://openalex.org/W2052998753","https://openalex.org/W2062227835","https://openalex.org/W2091746061","https://openalex.org/W2107988889","https://openalex.org/W2128022332","https://openalex.org/W2134383396","https://openalex.org/W2146603315","https://openalex.org/W2155273149","https://openalex.org/W2158598479","https://openalex.org/W2167655920","https://openalex.org/W2182963628","https://openalex.org/W2184045248","https://openalex.org/W2399016933","https://openalex.org/W2403081842"],"related_works":["https://openalex.org/W3127686677","https://openalex.org/W97919259","https://openalex.org/W2156205765","https://openalex.org/W231741463","https://openalex.org/W4200068392","https://openalex.org/W80423236","https://openalex.org/W2772686614","https://openalex.org/W2036933852","https://openalex.org/W2808291730","https://openalex.org/W2398872909"],"abstract_inverted_index":{"Phonemic":[0],"or":[1],"phonetic":[2],"sub-word":[3,72],"units":[4,73],"are":[5,20],"the":[6,22,63,81,103],"most":[7],"commonly":[8],"used":[9],"atomic":[10],"elements":[11],"to":[12,26,36,66],"represent":[13],"speech":[14,100],"signals":[15],"in":[16],"modern":[17],"ASRs.However":[18],"they":[19],"not":[21],"optimal":[23],"choice":[24],"due":[25],"several":[27],"reasons":[28],"such":[29],"as:":[30],"large":[31],"amount":[32],"of":[33,71,90],"effort":[34],"required":[35],"handcraft":[37],"a":[38,52,69,75,91],"pronunciation":[39,41,54],"dictionary,":[40],"variations,":[42],"human":[43],"mistakes":[44],"and":[45,48,56,74],"under-resourced":[46],"dialects":[47],"languages.Here,":[49],"we":[50],"propose":[51],"data-driven":[53],"estimation":[55],"acoustic":[57],"modeling":[58],"method":[59,83],"which":[60,84],"only":[61],"takes":[62],"orthographic":[64],"transcription":[65],"jointly":[67],"estimate":[68],"set":[70],"reliable":[76],"dictionary.Experimental":[77],"results":[78],"show":[79],"that":[80],"proposed":[82],"is":[85],"based":[86,98],"on":[87,102],"semi-supervised":[88],"training":[89],"deep":[92],"neural":[93],"network":[94],"largely":[95],"outperforms":[96],"phoneme":[97],"continuous":[99],"recognition":[101],"TIMIT":[104],"dataset.":[105]},"counts_by_year":[{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
