{"id":"https://openalex.org/W2032942114","doi":"https://doi.org/10.1145/1322391.1322394","title":"Morph-based speech recognition and modeling of out-of-vocabulary words across languages","display_name":"Morph-based speech recognition and modeling of out-of-vocabulary words across languages","publication_year":2007,"publication_date":"2007-12-01","ids":{"openalex":"https://openalex.org/W2032942114","doi":"https://doi.org/10.1145/1322391.1322394","mag":"2032942114"},"language":"en","primary_location":{"id":"doi:10.1145/1322391.1322394","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1322391.1322394","pdf_url":null,"source":{"id":"https://openalex.org/S200945739","display_name":"ACM Transactions on Speech and Language Processing","issn_l":"1550-4875","issn":["1550-4875","1550-4883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Speech and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5061159001","display_name":"Mathias Creutz","orcid":"https://orcid.org/0000-0003-1862-4172"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Mathias Creutz","raw_affiliation_strings":["Helsinki University of Technology, TKK, Finland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Helsinki University of Technology, TKK, Finland","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085765257","display_name":"Teemu Hirsim\u00e4ki","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Teemu Hirsim\u00e4ki","raw_affiliation_strings":["Helsinki University of Technology, TKK, Finland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Helsinki University of Technology, TKK, Finland","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043424064","display_name":"Mikko Kurimo","orcid":"https://orcid.org/0000-0001-5278-7974"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mikko Kurimo","raw_affiliation_strings":["Helsinki University of Technology, TKK, Finland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Helsinki University of Technology, TKK, Finland","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089859626","display_name":"Antti Puurula","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Antti Puurula","raw_affiliation_strings":["Helsinki University of Technology, TKK, Finland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Helsinki University of Technology, TKK, Finland","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046171140","display_name":"Janne Pylkk\u00f6nen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Janne Pylkk\u00f6nen","raw_affiliation_strings":["Helsinki University of Technology, TKK, Finland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Helsinki University of Technology, TKK, Finland","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040694307","display_name":"Vesa Siivola","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vesa Siivola","raw_affiliation_strings":["Helsinki University of Technology, TKK, Finland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Helsinki University of Technology, TKK, Finland","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030484123","display_name":"Matti Varjokallio","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Matti Varjokallio","raw_affiliation_strings":["Helsinki University of Technology, TKK, Finland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Helsinki University of Technology, TKK, Finland","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032284977","display_name":"Ebru Ar\u0131soy","orcid":null},"institutions":[{"id":"https://openalex.org/I4405392","display_name":"Bo\u011fazi\u00e7i University","ror":"https://ror.org/03z9tma90","country_code":"TR","type":"education","lineage":["https://openalex.org/I4405392"]}],"countries":["TR"],"is_corresponding":false,"raw_author_name":"Ebru Arisoy","raw_affiliation_strings":["Bo\u01e7azi\u00e7i University, Istanbul"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Bo\u01e7azi\u00e7i University, Istanbul","institution_ids":["https://openalex.org/I4405392"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055086464","display_name":"Murat Sara\u00e7lar","orcid":"https://orcid.org/0000-0002-7435-8510"},"institutions":[{"id":"https://openalex.org/I4405392","display_name":"Bo\u011fazi\u00e7i University","ror":"https://ror.org/03z9tma90","country_code":"TR","type":"education","lineage":["https://openalex.org/I4405392"]}],"countries":["TR"],"is_corresponding":false,"raw_author_name":"Murat Sara\u00e7lar","raw_affiliation_strings":["Bo\u01e7azi\u00e7i University, Istanbul"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Bo\u01e7azi\u00e7i University, Istanbul","institution_ids":["https://openalex.org/I4405392"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5060979948","display_name":"Andreas Stolcke","orcid":"https://orcid.org/0000-0002-9925-905X"},"institutions":[{"id":"https://openalex.org/I1297971548","display_name":"International Computer Science Institute","ror":"https://ror.org/01ewh7m12","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I1297971548"]},{"id":"https://openalex.org/I1298353152","display_name":"SRI International","ror":"https://ror.org/05s570m15","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I1298353152"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Andreas Stolcke","raw_affiliation_strings":["SRI International, Menlo Park International Computer Science Institute, Berkeley","SRI International, Menlo Park International Computer Science Institute, Berkeley#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"SRI International, Menlo Park International Computer Science Institute, Berkeley","institution_ids":["https://openalex.org/I1298353152","https://openalex.org/I1297971548"]},{"raw_affiliation_string":"SRI International, Menlo Park International Computer Science Institute, Berkeley#TAB#","institution_ids":["https://openalex.org/I1297971548"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5061159001"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":13.0737,"has_fulltext":false,"cited_by_count":127,"citation_normalized_percentile":{"value":0.98652862,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"5","issue":"1","first_page":"1","last_page":"29"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8313825130462646},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.733767032623291},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6953538060188293},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.6788477897644043},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6502493023872375},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.6428486108779907},{"id":"https://openalex.org/keywords/modern-standard-arabic","display_name":"Modern Standard Arabic","score":0.5623435974121094},{"id":"https://openalex.org/keywords/arabic","display_name":"Arabic","score":0.501122236251831},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4864608943462372},{"id":"https://openalex.org/keywords/turkish","display_name":"Turkish","score":0.48278653621673584},{"id":"https://openalex.org/keywords/n-gram","display_name":"n-gram","score":0.47493746876716614},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.27445507049560547}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8313825130462646},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.733767032623291},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6953538060188293},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.6788477897644043},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6502493023872375},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.6428486108779907},{"id":"https://openalex.org/C2778243841","wikidata":"https://www.wikidata.org/wiki/Q56467","display_name":"Modern Standard Arabic","level":3,"score":0.5623435974121094},{"id":"https://openalex.org/C96455323","wikidata":"https://www.wikidata.org/wiki/Q13955","display_name":"Arabic","level":2,"score":0.501122236251831},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4864608943462372},{"id":"https://openalex.org/C2781121862","wikidata":"https://www.wikidata.org/wiki/Q256","display_name":"Turkish","level":2,"score":0.48278653621673584},{"id":"https://openalex.org/C117884012","wikidata":"https://www.wikidata.org/wiki/Q94489","display_name":"n-gram","level":3,"score":0.47493746876716614},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.27445507049560547},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/1322391.1322394","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1322391.1322394","pdf_url":null,"source":{"id":"https://openalex.org/S200945739","display_name":"ACM Transactions on Speech and Language Processing","issn_l":"1550-4875","issn":["1550-4875","1550-4883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Speech and Language Processing","raw_type":"journal-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.157.3581","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.157.3581","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www-speech.sri.com/cgi-bin/run-distill?papers/acm2007-morph-asr.ps.gz","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.7699999809265137}],"awards":[{"id":"https://openalex.org/G8361621378","display_name":null,"funder_award_id":"H0011-06-C-0023","funder_id":"https://openalex.org/F4320332180","funder_display_name":"Defense Advanced Research Projects Agency"}],"funders":[{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":61,"referenced_works":["https://openalex.org/W813000","https://openalex.org/W17921661","https://openalex.org/W23024349","https://openalex.org/W58893626","https://openalex.org/W68797657","https://openalex.org/W77114827","https://openalex.org/W139293362","https://openalex.org/W148400104","https://openalex.org/W154368987","https://openalex.org/W201532657","https://openalex.org/W294504433","https://openalex.org/W1488564527","https://openalex.org/W1501139663","https://openalex.org/W1503913248","https://openalex.org/W1528716106","https://openalex.org/W1530250655","https://openalex.org/W1533169541","https://openalex.org/W1537482007","https://openalex.org/W1631260214","https://openalex.org/W1683106298","https://openalex.org/W1707124376","https://openalex.org/W1797288984","https://openalex.org/W1934041838","https://openalex.org/W1956785191","https://openalex.org/W1970887833","https://openalex.org/W1983311927","https://openalex.org/W2010910318","https://openalex.org/W2031287088","https://openalex.org/W2042783153","https://openalex.org/W2050938027","https://openalex.org/W2053306448","https://openalex.org/W2056250865","https://openalex.org/W2069712814","https://openalex.org/W2070554026","https://openalex.org/W2074546930","https://openalex.org/W2098439409","https://openalex.org/W2100373303","https://openalex.org/W2101711363","https://openalex.org/W2103589071","https://openalex.org/W2117621558","https://openalex.org/W2122228338","https://openalex.org/W2141684702","https://openalex.org/W2142222482","https://openalex.org/W2150144720","https://openalex.org/W2158195707","https://openalex.org/W2162160325","https://openalex.org/W2304025599","https://openalex.org/W2573934436","https://openalex.org/W2596600410","https://openalex.org/W2603415636","https://openalex.org/W2725592455","https://openalex.org/W2728904056","https://openalex.org/W2915722758","https://openalex.org/W2952343510","https://openalex.org/W3036063182","https://openalex.org/W3143835353","https://openalex.org/W3183153947","https://openalex.org/W4251556668","https://openalex.org/W6631963490","https://openalex.org/W6636811518","https://openalex.org/W6638218882"],"related_works":["https://openalex.org/W2250909759","https://openalex.org/W2532616038","https://openalex.org/W2624072012","https://openalex.org/W2787311093","https://openalex.org/W2620283452","https://openalex.org/W2057384730","https://openalex.org/W4307474317","https://openalex.org/W2008468404","https://openalex.org/W2147879411","https://openalex.org/W4322750817"],"abstract_inverted_index":{"We":[0],"explore":[1],"the":[2,38,54,57,80,96,107,117,122,127,132,135],"use":[3],"of":[4,49,52,56,137],"morph-based":[5],"language":[6,45,58],"models":[7,46,72,82,98],"in":[8,32,131],"large-vocabulary":[9],"continuous-speech":[10],"recognition":[11,108],"systems":[12],"across":[13],"four":[14],"so-called":[15],"morphologically":[16],"rich":[17],"languages:":[18],"Finnish,":[19],"Estonian,":[20],"Turkish,":[21],"and":[22,66,134],"Egyptian":[23],"Colloquial":[24],"Arabic.":[25],"The":[26,113],"morphs":[27,50],"are":[28,139],"subword":[29],"units":[30],"discovered":[31],"an":[33],"unsupervised,":[34],"data-driven":[35],"way":[36],"using":[37],"Morfessor":[39],"algorithm.":[40],"By":[41],"estimating":[42],"n":[43],"-gram":[44],"over":[47],"sequences":[48],"instead":[51],"words,":[53],"quality":[55],"model":[59,125],"is":[60,93],"improved":[61],"through":[62],"better":[63],"vocabulary":[64],"coverage":[65],"reduced":[67],"data":[68,138],"sparsity.":[69],"Standard":[70],"word":[71,87,124],"suffer":[73],"from":[74],"high":[75],"out-of-vocabulary":[76],"(OOV)":[77],"rates,":[78],"whereas":[79],"morph":[81,97,128],"can":[83],"recognize":[84],"previously":[85],"unseen":[86],"forms":[88],"by":[89],"concatenating":[90],"morphs.":[91],"It":[92],"shown":[94],"that":[95],"do":[99],"perform":[100],"fairly":[101],"well":[102],"on":[103,110],"OOVs":[104],"without":[105],"compromising":[106],"accuracy":[109],"in-vocabulary":[111],"words.":[112],"Arabic":[114],"experiment":[115],"constitutes":[116],"only":[118],"exception":[119],"since":[120],"here":[121],"standard":[123],"outperforms":[126],"model.":[129],"Differences":[130],"datasets":[133],"amount":[136],"discussed":[140],"as":[141],"a":[142],"plausible":[143],"explanation.":[144]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":7},{"year":2019,"cited_by_count":11},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":11},{"year":2016,"cited_by_count":9},{"year":2015,"cited_by_count":11},{"year":2014,"cited_by_count":7},{"year":2013,"cited_by_count":11},{"year":2012,"cited_by_count":7}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
