{"id":"https://openalex.org/W2037164444","doi":"https://doi.org/10.1142/s0219427905001286","title":"Chinese Unknown Word Identification Based on Local Bigram Model","display_name":"Chinese Unknown Word Identification Based on Local Bigram Model","publication_year":2005,"publication_date":"2005-09-01","ids":{"openalex":"https://openalex.org/W2037164444","doi":"https://doi.org/10.1142/s0219427905001286","mag":"2037164444"},"language":"en","primary_location":{"id":"doi:10.1142/s0219427905001286","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0219427905001286","pdf_url":null,"source":{"id":"https://openalex.org/S50006202","display_name":"International Journal of Computer Processing Of Languages","issn_l":"1793-8406","issn":["1793-8406","2010-0205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Computer Processing of Languages","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100741930","display_name":"Zhuoran Wang","orcid":"https://orcid.org/0000-0001-7218-4046"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"ZHUORAN WANG","raw_affiliation_strings":["School of Mathematical & Computer Sciences"],"affiliations":[{"raw_affiliation_string":"School of Mathematical & Computer Sciences","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100418120","display_name":"Ting Liu","orcid":"https://orcid.org/0000-0001-8904-8796"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"TING LIU","raw_affiliation_strings":["Harbin Institute of Technology"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology","institution_ids":["https://openalex.org/I204983213"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5100741930"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.11357787,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"18","issue":"03","first_page":"185","last_page":"196"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.986299991607666,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bigram","display_name":"Bigram","score":0.9760580062866211},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.852291464805603},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.6601852178573608},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.6226571798324585},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5972362756729126},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5859570503234863},{"id":"https://openalex.org/keywords/word-identification","display_name":"Word identification","score":0.5705395340919495},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.42891746759414673},{"id":"https://openalex.org/keywords/word-recognition","display_name":"Word recognition","score":0.17322388291358948},{"id":"https://openalex.org/keywords/trigram","display_name":"Trigram","score":0.14026790857315063},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.10623243451118469}],"concepts":[{"id":"https://openalex.org/C108757681","wikidata":"https://www.wikidata.org/wiki/Q2773912","display_name":"Bigram","level":3,"score":0.9760580062866211},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.852291464805603},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.6601852178573608},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.6226571798324585},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5972362756729126},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5859570503234863},{"id":"https://openalex.org/C2994428975","wikidata":"https://www.wikidata.org/wiki/Q8034367","display_name":"Word identification","level":4,"score":0.5705395340919495},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.42891746759414673},{"id":"https://openalex.org/C150856459","wikidata":"https://www.wikidata.org/wiki/Q8034367","display_name":"Word recognition","level":3,"score":0.17322388291358948},{"id":"https://openalex.org/C137546455","wikidata":"https://www.wikidata.org/wiki/Q3213474","display_name":"Trigram","level":2,"score":0.14026790857315063},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.10623243451118469},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C554936623","wikidata":"https://www.wikidata.org/wiki/Q199657","display_name":"Reading (process)","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1142/s0219427905001286","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0219427905001286","pdf_url":null,"source":{"id":"https://openalex.org/S50006202","display_name":"International Journal of Computer Processing Of Languages","issn_l":"1793-8406","issn":["1793-8406","2010-0205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Computer Processing of Languages","raw_type":"journal-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.83.5671","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.83.5671","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://ir.hit.edu.cn/ir_papers/Vol_3/Chinese Unknown Word Identification Based on Local Bigram Model.pdf","raw_type":"text"},{"id":"pmh:oai:eprints.soton.ac.uk:261543","is_oa":false,"landing_page_url":"https://eprints.soton.ac.uk/261543/","pdf_url":null,"source":{"id":"https://openalex.org/S4306401019","display_name":"ePrints Soton (University of Southampton)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I43439940","host_organization_name":"University of Southampton","host_organization_lineage":["https://openalex.org/I43439940"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W64028706","https://openalex.org/W129714699","https://openalex.org/W287031571","https://openalex.org/W1861196058","https://openalex.org/W1967541735","https://openalex.org/W2033295622","https://openalex.org/W2083979457","https://openalex.org/W2116317530","https://openalex.org/W2117400858","https://openalex.org/W2118936156","https://openalex.org/W2785940127","https://openalex.org/W4302400390"],"related_works":["https://openalex.org/W2111465144","https://openalex.org/W2185649612","https://openalex.org/W1006721676","https://openalex.org/W2156230116","https://openalex.org/W2124470186","https://openalex.org/W2269447762","https://openalex.org/W1992616322","https://openalex.org/W2036873286","https://openalex.org/W2197825247","https://openalex.org/W4283746496"],"abstract_inverted_index":{"The":[0,95],"paper":[1],"presents":[2],"a":[3,11,21,40,54,66],"Chinese":[4],"unknown":[5,29],"word":[6,17],"identification":[7],"system":[8,19],"based":[9],"on":[10],"local":[12],"bigram":[13,41],"model.":[14,24],"Generally,":[15],"our":[16,98],"segmentation":[18],"employs":[20],"statistical-based":[22],"unigram":[23],"But":[25],"to":[26],"identify":[27],"those":[28],"words,":[30],"we":[31,57],"take":[32],"advantage":[33],"of":[34,48,68,81,97],"their":[35],"contextual":[36],"information":[37],"and":[38,87],"apply":[39],"model":[42],"locally.":[43],"By":[44],"adjusting":[45],"the":[46,79,101],"value":[47],"interpolation":[49],"which":[50],"is":[51,72,84,103],"derived":[52],"from":[53],"smoothing":[55],"method,":[56],"combine":[58],"these":[59],"two":[60],"models":[61],"with":[62],"different":[63],"dimensions.":[64],"As":[65],"simplification":[67],"bigram,":[69],"this":[70],"method":[71],"simple":[73],"as":[74,76],"well":[75],"feasible,":[77],"since":[78],"complexity":[80],"its":[82],"algorithm":[83],"quite":[85],"low":[86],"not":[88],"so":[89],"many":[90],"training":[91],"corpora":[92],"are":[93],"needed.":[94],"results":[96],"experiments":[99],"show":[100],"solution":[102],"effective.":[104]},"counts_by_year":[{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":1},{"year":2012,"cited_by_count":1}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
