{"id":"https://openalex.org/W2262393948","doi":"https://doi.org/10.1017/s1351324915000315","title":"Phonetisaurus: Exploring grapheme-to-phoneme conversion with joint n-gram models in the WFST framework","display_name":"Phonetisaurus: Exploring grapheme-to-phoneme conversion with joint n-gram models in the WFST framework","publication_year":2015,"publication_date":"2015-09-07","ids":{"openalex":"https://openalex.org/W2262393948","doi":"https://doi.org/10.1017/s1351324915000315","mag":"2262393948"},"language":"en","primary_location":{"id":"doi:10.1017/s1351324915000315","is_oa":false,"landing_page_url":"https://doi.org/10.1017/s1351324915000315","pdf_url":null,"source":{"id":"https://openalex.org/S18088403","display_name":"Natural Language Engineering","issn_l":"1351-3249","issn":["1351-3249","1469-8110"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Natural Language Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5110649023","display_name":"Josef R. Novak","orcid":null},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"JOSEF ROBERT NOVAK","raw_affiliation_strings":["The University of Tokyo, Graduate School of Information Science and Technology, Tokyo, Japan e-mails:"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The University of Tokyo, Graduate School of Information Science and Technology, Tokyo, Japan e-mails:","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041213266","display_name":"Nobuaki Minematsu","orcid":"https://orcid.org/0000-0002-8778-9555"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"NOBUAKI MINEMATSU","raw_affiliation_strings":["The University of Tokyo, Graduate School of Information Science and Technology, Tokyo, Japan e-mails:"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The University of Tokyo, Graduate School of Information Science and Technology, Tokyo, Japan e-mails:","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5108197272","display_name":"Keikichi Hirose","orcid":null},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"KEIKICHI HIROSE","raw_affiliation_strings":["The University of Tokyo, Graduate School of Information Science and Technology, Tokyo, Japan e-mails:"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The University of Tokyo, Graduate School of Information Science and Technology, Tokyo, Japan e-mails:","institution_ids":["https://openalex.org/I74801974"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":8.0131,"has_fulltext":false,"cited_by_count":88,"citation_normalized_percentile":{"value":0.97424998,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":"22","issue":"6","first_page":"907","last_page":"938"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9872999787330627,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/grapheme","display_name":"Grapheme","score":0.9131577014923096},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8889855146408081},{"id":"https://openalex.org/keywords/n-gram","display_name":"n-gram","score":0.6615010499954224},{"id":"https://openalex.org/keywords/joint","display_name":"Joint (building)","score":0.6390810012817383},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.5470642447471619},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.4872087836265564},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.4656354486942291},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.4365127384662628},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.418739378452301},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.33296483755111694},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.2873753011226654},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.16570672392845154}],"concepts":[{"id":"https://openalex.org/C2776779415","wikidata":"https://www.wikidata.org/wiki/Q2545446","display_name":"Grapheme","level":3,"score":0.9131577014923096},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8889855146408081},{"id":"https://openalex.org/C117884012","wikidata":"https://www.wikidata.org/wiki/Q94489","display_name":"n-gram","level":3,"score":0.6615010499954224},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.6390810012817383},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.5470642447471619},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.4872087836265564},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.4656354486942291},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.4365127384662628},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.418739378452301},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33296483755111694},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2873753011226654},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.16570672392845154},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0},{"id":"https://openalex.org/C170154142","wikidata":"https://www.wikidata.org/wiki/Q150737","display_name":"Architectural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C30080830","wikidata":"https://www.wikidata.org/wiki/Q169917","display_name":"Graphene","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1017/s1351324915000315","is_oa":false,"landing_page_url":"https://doi.org/10.1017/s1351324915000315","pdf_url":null,"source":{"id":"https://openalex.org/S18088403","display_name":"Natural Language Engineering","issn_l":"1351-3249","issn":["1351-3249","1469-8110"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Natural Language Engineering","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.41999998688697815,"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W28516297","https://openalex.org/W51288547","https://openalex.org/W67332896","https://openalex.org/W80018330","https://openalex.org/W179707045","https://openalex.org/W179875071","https://openalex.org/W201187342","https://openalex.org/W1529481723","https://openalex.org/W1597368943","https://openalex.org/W1809686770","https://openalex.org/W1934041838","https://openalex.org/W2046932483","https://openalex.org/W2090755665","https://openalex.org/W2097927681","https://openalex.org/W2102443632","https://openalex.org/W2113641473","https://openalex.org/W2118947254","https://openalex.org/W2125528794","https://openalex.org/W2131774270","https://openalex.org/W2132714218","https://openalex.org/W2158195707","https://openalex.org/W2250489405","https://openalex.org/W2251711535","https://openalex.org/W2293770964","https://openalex.org/W2296237247","https://openalex.org/W2397041158","https://openalex.org/W2405860391","https://openalex.org/W2405962136","https://openalex.org/W2474824677","https://openalex.org/W2514828952","https://openalex.org/W2572721812","https://openalex.org/W2603454388","https://openalex.org/W2604799257","https://openalex.org/W2611071497","https://openalex.org/W2626138915","https://openalex.org/W2913278272","https://openalex.org/W2915722758","https://openalex.org/W2916018751","https://openalex.org/W4229907492","https://openalex.org/W4394150413","https://openalex.org/W6630268475","https://openalex.org/W6662227517","https://openalex.org/W6679317074"],"related_works":["https://openalex.org/W2506515307","https://openalex.org/W2060656088","https://openalex.org/W4385893898","https://openalex.org/W2383836440","https://openalex.org/W2610662399","https://openalex.org/W4285757703","https://openalex.org/W2509341624","https://openalex.org/W2262393948","https://openalex.org/W2402737078","https://openalex.org/W2187175741"],"abstract_inverted_index":{"Abstract":[0],"This":[1],"paper":[2,28,62,84,130],"provides":[3],"an":[4,52],"analysis":[5],"of":[6,16,66,90,118,158],"several":[7],"practical":[8,38],"issues":[9,30],"related":[10,31],"to":[11,32,50,70,115,151,154],"the":[12,22,87,119,129,144],"theory":[13],"and":[14,37,58,80,99,109,126],"implementation":[15,124],"Grapheme-to-Phoneme":[17],"(G2P)":[18],"conversion":[19,140],"systems":[20],"utilizing":[21],"Weighted":[23],"Finite-State":[24],"Transducer":[25],"paradigm.":[26],"The":[27,40,61,83,147],"addresses":[29],"system":[33,56],"accuracy,":[34],"training":[35,59,77],"time":[36],"implementation.":[39],"focus":[41],"is":[42,149],"on":[43],"joint":[44,92],"n-gram":[45,110],"models":[46],"which":[47,72,142],"have":[48],"proven":[49],"provide":[51],"excellent":[53],"trade-off":[54],"between":[55,76],"accuracy":[57,79],"complexity.":[60,82],"argues":[63],"in":[64],"favor":[65,73],"simple,":[67],"productive":[68],"approaches":[69],"G2P,":[71],"a":[74,134,155],"balance":[75],"time,":[78],"model":[81],"also":[85],"introduces":[86,131],"first":[88],"instance":[89],"using":[91],"sequence":[93],"RnnLMs":[94,108],"directly":[95],"for":[96],"G2P":[97,139],"conversion,":[98],"achieves":[100],"new":[101],"state-of-the-art":[102],"performance":[103],"via":[104],"ensemble":[105],"methods":[106],"combining":[107],"based":[111],"models.":[112],"In":[113],"addition":[114],"detailed":[116],"descriptions":[117],"approach,":[120],"minor":[121],"yet":[122],"novel":[123],"solutions,":[125],"experimental":[127],"results,":[128],"Phonetisaurus":[132],",":[133],"fully-functional,":[135],"flexible,":[136],"open-source,":[137],"BSD-licensed":[138],"toolkit,":[141],"leverages":[143],"OpenFst":[145],"library.":[146],"work":[148],"intended":[150],"be":[152],"accessible":[153],"broad":[156],"range":[157],"readers.":[159]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":10},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":20},{"year":2020,"cited_by_count":14},{"year":2019,"cited_by_count":6},{"year":2018,"cited_by_count":4},{"year":2017,"cited_by_count":9},{"year":2016,"cited_by_count":5}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
