{"id":"https://openalex.org/W2033413759","doi":"https://doi.org/10.1109/asru.2013.6707773","title":"Unsupervised word segmentation from noisy input","display_name":"Unsupervised word segmentation from noisy input","publication_year":2013,"publication_date":"2013-12-01","ids":{"openalex":"https://openalex.org/W2033413759","doi":"https://doi.org/10.1109/asru.2013.6707773","mag":"2033413759"},"language":"en","primary_location":{"id":"doi:10.1109/asru.2013.6707773","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru.2013.6707773","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2013 IEEE Workshop on Automatic Speech Recognition and Understanding","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5006131416","display_name":"Jahn Heymann","orcid":null},"institutions":[{"id":"https://openalex.org/I206945453","display_name":"Paderborn University","ror":"https://ror.org/058kzsd48","country_code":"DE","type":"education","lineage":["https://openalex.org/I206945453"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Jahn Heymann","raw_affiliation_strings":["Department of Communications Engineering, University of Paderborn, Paderborn, Germany","[Department of Communication Engineering, University of Paderborn, Paderborn, Germany]"],"affiliations":[{"raw_affiliation_string":"Department of Communications Engineering, University of Paderborn, Paderborn, Germany","institution_ids":["https://openalex.org/I206945453"]},{"raw_affiliation_string":"[Department of Communication Engineering, University of Paderborn, Paderborn, Germany]","institution_ids":["https://openalex.org/I206945453"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022949022","display_name":"Oliver Walter","orcid":null},"institutions":[{"id":"https://openalex.org/I206945453","display_name":"Paderborn University","ror":"https://ror.org/058kzsd48","country_code":"DE","type":"education","lineage":["https://openalex.org/I206945453"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Oliver Walter","raw_affiliation_strings":["Department of Communications Engineering, University of Paderborn, Paderborn, Germany","[Department of Communication Engineering, University of Paderborn, Paderborn, Germany]"],"affiliations":[{"raw_affiliation_string":"Department of Communications Engineering, University of Paderborn, Paderborn, Germany","institution_ids":["https://openalex.org/I206945453"]},{"raw_affiliation_string":"[Department of Communication Engineering, University of Paderborn, Paderborn, Germany]","institution_ids":["https://openalex.org/I206945453"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082075598","display_name":"Reinhold Haeb\u2010Umbach","orcid":"https://orcid.org/0000-0001-9468-7330"},"institutions":[{"id":"https://openalex.org/I206945453","display_name":"Paderborn University","ror":"https://ror.org/058kzsd48","country_code":"DE","type":"education","lineage":["https://openalex.org/I206945453"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Reinhold Haeb-Umbach","raw_affiliation_strings":["Department of Communications Engineering, University of Paderborn, Paderborn, Germany","[Department of Communication Engineering, University of Paderborn, Paderborn, Germany]"],"affiliations":[{"raw_affiliation_string":"Department of Communications Engineering, University of Paderborn, Paderborn, Germany","institution_ids":["https://openalex.org/I206945453"]},{"raw_affiliation_string":"[Department of Communication Engineering, University of Paderborn, Paderborn, Germany]","institution_ids":["https://openalex.org/I206945453"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5113017615","display_name":"Bhiksha Raj","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bhiksha Raj","raw_affiliation_strings":["Carnegie Mellon University, Language Technologies Institute, Pittsburgh, PA, United States","Language Technology Institute, Carnegie Mellon University, Pittsburgh, PA, USA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, Language Technologies Institute, Pittsburgh, PA, United States","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Language Technology Institute, Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5006131416"],"corresponding_institution_ids":["https://openalex.org/I206945453"],"apc_list":null,"apc_paid":null,"fwci":5.4383,"has_fulltext":false,"cited_by_count":23,"citation_normalized_percentile":{"value":0.95438739,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"458","last_page":"463"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.807420015335083},{"id":"https://openalex.org/keywords/character","display_name":"Character (mathematics)","score":0.6555994153022766},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5828766822814941},{"id":"https://openalex.org/keywords/lexicon","display_name":"Lexicon","score":0.580411970615387},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5715410113334656},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5369361042976379},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5356981158256531},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5053029656410217},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.45787450671195984},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4281274080276489},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.40406715869903564},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.37682849168777466},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.13433608412742615}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.807420015335083},{"id":"https://openalex.org/C2780861071","wikidata":"https://www.wikidata.org/wiki/Q1062934","display_name":"Character (mathematics)","level":2,"score":0.6555994153022766},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5828766822814941},{"id":"https://openalex.org/C2778121359","wikidata":"https://www.wikidata.org/wiki/Q8096","display_name":"Lexicon","level":2,"score":0.580411970615387},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5715410113334656},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5369361042976379},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5356981158256531},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5053029656410217},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.45787450671195984},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4281274080276489},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.40406715869903564},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.37682849168777466},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.13433608412742615},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/asru.2013.6707773","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru.2013.6707773","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2013 IEEE Workshop on Automatic Speech Recognition and Understanding","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.644.8667","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.644.8667","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://nt.uni-paderborn.de/public/pubs/2013/HeWaHaRa_Poster.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.645.6716","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.645.6716","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://nt.uni-paderborn.de/public/pubs/2013/HeWaHaRa13.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W175734176","https://openalex.org/W1574901103","https://openalex.org/W1606268232","https://openalex.org/W2005902041","https://openalex.org/W2009388533","https://openalex.org/W2024490156","https://openalex.org/W2025482506","https://openalex.org/W2055408826","https://openalex.org/W2114347655","https://openalex.org/W2127612600","https://openalex.org/W2140991203","https://openalex.org/W2142390309","https://openalex.org/W2277403007","https://openalex.org/W2289030499","https://openalex.org/W6636285342","https://openalex.org/W6678951767","https://openalex.org/W6694926450","https://openalex.org/W6696711221"],"related_works":["https://openalex.org/W2594897229","https://openalex.org/W2151348424","https://openalex.org/W4221142855","https://openalex.org/W2050138804","https://openalex.org/W2129812225","https://openalex.org/W2944691285","https://openalex.org/W4290708361","https://openalex.org/W26527944","https://openalex.org/W2523799048","https://openalex.org/W2155620340"],"abstract_inverted_index":{"In":[0],"this":[1,104],"paper":[2],"we":[3,87,145],"present":[4],"an":[5,56,92],"algorithm":[6],"for":[7,31,127,133,139],"the":[8,23,32,35,39,47,53,66,69,116,142,159,165],"unsupervised":[9],"segmentation":[10,161],"of":[11,34,49,55,97,118],"a":[12,20,27,61,75,134,147],"character":[13],"or":[14],"phoneme":[15,58],"lattice":[16,21,135],"into":[17],"words.":[18],"Using":[19],"at":[22,115],"input":[24,129,136],"rather":[25],"than":[26],"single":[28,128],"string":[29],"accounts":[30],"uncertainty":[33],"character/phoneme":[36,143],"recognizer":[37,59],"about":[38],"true":[40],"label":[41],"sequence.":[42],"An":[43],"example":[44],"application":[45],"is":[46,72,123,154],"discovery":[48],"lexical":[50],"units":[51],"from":[52,91],"output":[54],"error-prone":[57],"in":[60,141],"zero-resource":[62],"setting,":[63],"where":[64],"neither":[65],"lexicon":[67],"nor":[68],"language":[70,94],"model":[71,95],"known.":[73],"Recently":[74],"Weighted":[76],"Finite":[77],"State":[78],"Transducer":[79],"(WFST)":[80],"based":[81],"approach":[82],"has":[83],"been":[84],"published":[85],"which":[86,153],"show":[88],"to":[89,107,156,164],"suffer":[90],"issue:":[93],"probabilities":[96],"known":[98],"words":[99],"are":[100],"computed":[101],"incorrectly.":[102],"Fixing":[103],"issue":[105],"leads":[106],"greatly":[108],"improved":[109],"precision":[110],"and":[111,137],"recall":[112],"rates,":[113],"however":[114],"cost":[117],"increased":[119],"computational":[120],"complexity.":[121],"It":[122],"therefore":[124],"practical":[125],"only":[126],"strings.":[130],"To":[131],"allow":[132],"thus":[138],"errors":[140],"recognizer,":[144],"propose":[146],"computationally":[148],"efficient":[149],"suboptimal":[150],"two-stage":[151],"approach,":[152],"shown":[155],"significantly":[157],"improve":[158],"word":[160],"performance":[162],"compared":[163],"earlier":[166],"WFST":[167],"approach.":[168]},"counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":4},{"year":2017,"cited_by_count":5},{"year":2016,"cited_by_count":4},{"year":2015,"cited_by_count":4},{"year":2014,"cited_by_count":3}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
