{"id":"https://openalex.org/W3139170180","doi":"https://doi.org/10.1109/bigdata50022.2020.9377762","title":"An Empirical Study on Efficiency of a Dictionary Based Viterbi Algorithm for Word Segmentation","display_name":"An Empirical Study on Efficiency of a Dictionary Based Viterbi Algorithm for Word Segmentation","publication_year":2020,"publication_date":"2020-12-10","ids":{"openalex":"https://openalex.org/W3139170180","doi":"https://doi.org/10.1109/bigdata50022.2020.9377762","mag":"3139170180"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata50022.2020.9377762","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata50022.2020.9377762","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102143121","display_name":"Sudhir Aggarwal","orcid":null},"institutions":[{"id":"https://openalex.org/I103163165","display_name":"Florida State University","ror":"https://ror.org/05g3dte14","country_code":"US","type":"education","lineage":["https://openalex.org/I103163165"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Sudhir Aggarwal","raw_affiliation_strings":["Florida State University, Tallahassee, FL, USA"],"affiliations":[{"raw_affiliation_string":"Florida State University, Tallahassee, FL, USA","institution_ids":["https://openalex.org/I103163165"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072996215","display_name":"Shiva Houshmand","orcid":"https://orcid.org/0000-0003-0779-3595"},"institutions":[{"id":"https://openalex.org/I16269868","display_name":"Santa Clara University","ror":"https://ror.org/03ypqe447","country_code":"US","type":"education","lineage":["https://openalex.org/I16269868"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shiva Houshmand","raw_affiliation_strings":["Santa Clara University, Santa Clara, CA, USA"],"affiliations":[{"raw_affiliation_string":"Santa Clara University, Santa Clara, CA, USA","institution_ids":["https://openalex.org/I16269868"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047462138","display_name":"Tathagata Mukherjee","orcid":"https://orcid.org/0000-0001-8753-5718"},"institutions":[{"id":"https://openalex.org/I82495205","display_name":"University of Alabama in Huntsville","ror":"https://ror.org/02zsxwr40","country_code":"US","type":"education","lineage":["https://openalex.org/I82495205"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tathagata Mukherjee","raw_affiliation_strings":["The University of Alabama in Huntsville, Huntsville, AL, USA"],"affiliations":[{"raw_affiliation_string":"The University of Alabama in Huntsville, Huntsville, AL, USA","institution_ids":["https://openalex.org/I82495205"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015685665","display_name":"James Challis Parsons","orcid":null},"institutions":[{"id":"https://openalex.org/I103163165","display_name":"Florida State University","ror":"https://ror.org/05g3dte14","country_code":"US","type":"education","lineage":["https://openalex.org/I103163165"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"James Parsons","raw_affiliation_strings":["Florida State University, Tallahassee, FL, USA"],"affiliations":[{"raw_affiliation_string":"Florida State University, Tallahassee, FL, USA","institution_ids":["https://openalex.org/I103163165"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5102143121"],"corresponding_institution_ids":["https://openalex.org/I103163165"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.21729027,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"3","issue":null,"first_page":"3702","last_page":"3710"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9930999875068665,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7463547587394714},{"id":"https://openalex.org/keywords/viterbi-algorithm","display_name":"Viterbi algorithm","score":0.69849693775177},{"id":"https://openalex.org/keywords/substring","display_name":"Substring","score":0.6785430908203125},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.6194948554039001},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.5862438678741455},{"id":"https://openalex.org/keywords/pointer","display_name":"Pointer (user interface)","score":0.5013232231140137},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.47099679708480835},{"id":"https://openalex.org/keywords/string","display_name":"String (physics)","score":0.46426936984062195},{"id":"https://openalex.org/keywords/text-segmentation","display_name":"Text segmentation","score":0.449300616979599},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.44674715399742126},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.44430315494537354},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3378087282180786},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1961781084537506},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.09918063879013062}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7463547587394714},{"id":"https://openalex.org/C60582962","wikidata":"https://www.wikidata.org/wiki/Q83886","display_name":"Viterbi algorithm","level":3,"score":0.69849693775177},{"id":"https://openalex.org/C182407805","wikidata":"https://www.wikidata.org/wiki/Q2626534","display_name":"Substring","level":3,"score":0.6785430908203125},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.6194948554039001},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.5862438678741455},{"id":"https://openalex.org/C150202949","wikidata":"https://www.wikidata.org/wiki/Q107602","display_name":"Pointer (user interface)","level":2,"score":0.5013232231140137},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.47099679708480835},{"id":"https://openalex.org/C157486923","wikidata":"https://www.wikidata.org/wiki/Q1376436","display_name":"String (physics)","level":2,"score":0.46426936984062195},{"id":"https://openalex.org/C98501671","wikidata":"https://www.wikidata.org/wiki/Q1948408","display_name":"Text segmentation","level":3,"score":0.449300616979599},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.44674715399742126},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.44430315494537354},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3378087282180786},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1961781084537506},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.09918063879013062},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C37914503","wikidata":"https://www.wikidata.org/wiki/Q156495","display_name":"Mathematical physics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata50022.2020.9377762","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata50022.2020.9377762","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.4699999988079071,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W1991133427","https://openalex.org/W1992696103","https://openalex.org/W2019578814","https://openalex.org/W2095880437","https://openalex.org/W2106279089","https://openalex.org/W2158551114","https://openalex.org/W2161952424","https://openalex.org/W2163074454","https://openalex.org/W2886313391","https://openalex.org/W2902964203","https://openalex.org/W2980449122","https://openalex.org/W3149336535","https://openalex.org/W4237453526","https://openalex.org/W4250071748","https://openalex.org/W6676070474","https://openalex.org/W6793306531","https://openalex.org/W6832045124"],"related_works":["https://openalex.org/W1874815355","https://openalex.org/W4229844547","https://openalex.org/W4243828764","https://openalex.org/W4254523142","https://openalex.org/W2391392917","https://openalex.org/W3095240633","https://openalex.org/W75168880","https://openalex.org/W2464437250","https://openalex.org/W56552447","https://openalex.org/W2947398561"],"abstract_inverted_index":{"In":[0],"this":[1,30],"paper":[2],"we":[3,217,279],"present":[4],"an":[5,84,137],"algorithm":[6,36,45,220,249,283],"for":[7,37,102,122,173,272],"segmenting":[8],"English":[9,50],"sentences,":[10],"without":[11],"spaces,":[12],"into":[13,74],"their":[14],"constituent":[15,76],"words":[16,51,77,172,243],"based":[17,34],"on":[18,80,231],"a":[19,22,47,89,119,165,196,257],"dictionary":[20,33],"using":[21],"variation":[23,31],"of":[24,49,62,71,83,97,113,136,141,143,149,171,180,186,268,275],"the":[25,32,44,58,63,72,75,81,94,103,110,114,124,129,147,153,162,168,174,181,190,200,205,207,214,221,235,269,273],"Viterbi":[26,35],"algorithm.":[27,226],"We":[28,227,251],"call":[29,218],"word":[38,66,100,126,276],"boundary":[39],"identification.":[40],"The":[41,68,132,155,184],"input":[42,73,115,150,182],"to":[43,88,146,167,204,244],"is":[46,60,78,193,211,266],"sequence":[48,101],"which":[52,92,265],"are":[53],"not":[54],"space":[55],"separated":[56],"and":[57,117,288],"output":[59],"list":[61,140,170,185],"top-k":[64,69],"possible":[65,99,125,187],"segmentations.":[67],"segmentation":[70],"determined":[79],"basis":[82],"optimality":[85],"criteria":[86],"applied":[87],"scoring":[90],"function":[91],"computes":[93],"\"segmentation":[95],"score\"":[96],"each":[98],"given":[104],"input.":[105],"Our":[106],"implementation":[107,255,260],"works":[108],"from":[109],"back":[111],"(right)":[112],"string":[116],"creates":[118],"data":[120,133,201,209],"structure":[121,134,210],"storing":[123],"boundaries":[127],"during":[128,195,213],"first":[130,215],"pass.":[131],"consists":[135],"index":[138,163],"(a":[139],"nodes)":[142],"length":[144],"equal":[145],"number":[148],"characters":[151],"in":[152,161,285],"string.":[154],"i":[156,179],"<sup":[157],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[158],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">th</sup>":[159],"node":[160],"contains":[164],"pointer":[166],"optimal":[169,191],"substring":[175],"starting":[176],"at":[177],"position":[178],"sequence.":[183],"breakdowns":[188],"including":[189,234],"one":[192,241,267],"collected":[194],"second":[197],"pass":[198,216],"through":[199],"structure.":[202],"Due":[203],"way":[206],"initial":[208],"built":[212],"our":[219,248,254,282],"Reverse":[222],"Sequence":[223],"Search":[224],"(RSS)":[225],"report":[228],"experimental":[229],"results":[230],"several":[232],"datasets":[233],"entire":[236],"Brown":[237],"corpus":[238],"with":[239,256],"over":[240],"million":[242],"show":[245,280],"how":[246],"fast":[247],"works.":[250],"also":[252],"compare":[253],"dynamic":[258],"programming":[259],"provided":[261],"by":[262],"Norvig":[263],"[1]":[264],"classical":[270],"algorithms":[271],"problem":[274],"segmentation.":[277],"Furthermore":[278],"that":[281],"runs":[284],"O(n)":[286],"time":[287],"space.":[289]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
