{"id":"https://openalex.org/W2048350760","doi":"https://doi.org/10.1142/s0219427900000041","title":"Unknown Word and Phrase Extraction Using a Phrase-Like-Unit-Based Likelihood Ratio","display_name":"Unknown Word and Phrase Extraction Using a Phrase-Like-Unit-Based Likelihood Ratio","publication_year":2000,"publication_date":"2000-03-01","ids":{"openalex":"https://openalex.org/W2048350760","doi":"https://doi.org/10.1142/s0219427900000041","mag":"2048350760"},"language":"en","primary_location":{"id":"doi:10.1142/s0219427900000041","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0219427900000041","pdf_url":null,"source":{"id":"https://openalex.org/S50006202","display_name":"International Journal of Computer Processing Of Languages","issn_l":"1793-8406","issn":["1793-8406","2010-0205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Computer Processing of Languages","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5077944593","display_name":"Yu\u2010Sheng Lai","orcid":"https://orcid.org/0000-0003-1344-0817"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"YU-SHENG LAI","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5103251327","display_name":"Chung\u2010Hsien Wu","orcid":"https://orcid.org/0000-0002-3947-2123"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"CHUNG-HSIEN WU","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":3.1858,"has_fulltext":false,"cited_by_count":19,"citation_normalized_percentile":{"value":0.9216147,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":"13","issue":"01","first_page":"83","last_page":"95"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9937999844551086,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9937999844551086,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9836999773979187,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9782999753952026,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/phrase","display_name":"Phrase","score":0.8102132081985474},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.762231707572937},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6464719772338867},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.6224508285522461},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5796913504600525},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.1648178994655609}],"concepts":[{"id":"https://openalex.org/C2776224158","wikidata":"https://www.wikidata.org/wiki/Q187931","display_name":"Phrase","level":2,"score":0.8102132081985474},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.762231707572937},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6464719772338867},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.6224508285522461},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5796913504600525},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.1648178994655609},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1142/s0219427900000041","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0219427900000041","pdf_url":null,"source":{"id":"https://openalex.org/S50006202","display_name":"International Journal of Computer Processing Of Languages","issn_l":"1793-8406","issn":["1793-8406","2010-0205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Computer Processing of Languages","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6600000262260437,"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321040","display_name":"National Science Council","ror":"https://ror.org/02kv4zf79"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":3,"referenced_works":["https://openalex.org/W84324031","https://openalex.org/W2061271742","https://openalex.org/W2785508703"],"related_works":["https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2039546652","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2012262991","https://openalex.org/W2376932109","https://openalex.org/W2373794620","https://openalex.org/W3192589309","https://openalex.org/W2970166416"],"abstract_inverted_index":{"In":[0],"this":[1,95],"paper,":[2],"we":[3],"propose":[4],"a":[5,17,115,119,188],"statistical":[6],"method":[7,76,96,175],"to":[8,54,67,101,117,149],"extract":[9,55],"unknown":[10,21,71,81,178],"words":[11,22,38,72,82,179],"and":[12,62,99,143,180,183],"phrases":[13,24,84,181],"from":[14,91,107,133,185],"sentences":[15,124,132,186],"in":[16,39,125,187],"specific":[18,189],"domain.":[19,190],"The":[20,75,140,169],"or":[23,42,73,83],"are":[25,65,147],"defined":[26],"as":[27,136],"phrase-like":[28],"units":[29],"(PLU)":[30],"that":[31,45,173],"can":[32,79,87,176],"be":[33],"the":[34,40,69,89,126,134,137,144,155,158,164,174],"combinations":[35],"of":[36],"some":[37,43],"lexicon":[41],"characters":[44],"appear":[46],"together.":[47],"A":[48],"PLU-based":[49],"likelihood":[50],"ratio":[51],"is":[52,97],"proposed":[53,152],"possible":[56],"PLUs.":[57],"Two":[58],"principles,":[59],"overlap":[60],"competition":[61],"inclusion":[63],"competition,":[64],"used":[66,148],"decide":[68],"final":[70],"phrases.":[74],"not":[77],"only":[78],"detect":[80,177],"but":[85],"also":[86],"correct":[88],"errors":[90],"word":[92],"segmentation.":[93],"Additionally,":[94],"expandable":[98],"portable":[100],"other":[102],"domains.":[103],"We":[104,128],"collected":[105],"articles":[106],"MSDN":[108],"(Min":[109],"Sheng":[110],"Daily":[111],"News)":[112],"over":[113,122],"half":[114],"year":[116],"construct":[118],"corpus":[120,135],"containing":[121],"275,000":[123],"corpus.":[127,139],"randomly":[129],"choose":[130],"175,000":[131],"experimental":[138,170],"recall":[141,159],"rate":[142,146,160,166],"precision":[145,165],"evaluate":[150],"our":[151],"method.":[153],"Using":[154],"extraction":[156],"method,":[157],"achieved":[161,167],"88.7%":[162],"while":[163],"88.2%.":[168],"results":[171],"show":[172],"automatically":[182],"efficiently":[184]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
