{"id":"https://openalex.org/W2119276288","doi":"https://doi.org/10.1017/s1351324902002838","title":"Word clustering and disambiguation based on co-occurrence data","display_name":"Word clustering and disambiguation based on co-occurrence data","publication_year":2002,"publication_date":"2002-03-01","ids":{"openalex":"https://openalex.org/W2119276288","doi":"https://doi.org/10.1017/s1351324902002838","mag":"2119276288"},"language":"en","primary_location":{"id":"doi:10.1017/s1351324902002838","is_oa":false,"landing_page_url":"https://doi.org/10.1017/s1351324902002838","pdf_url":null,"source":{"id":"https://openalex.org/S18088403","display_name":"Natural Language Engineering","issn_l":"1351-3249","issn":["1351-3249","1469-8110"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Natural Language Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100455129","display_name":"Hang Li","orcid":"https://orcid.org/0000-0002-1230-4007"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]},{"id":"https://openalex.org/I118347220","display_name":"NEC (Japan)","ror":"https://ror.org/04jndar25","country_code":"JP","type":"company","lineage":["https://openalex.org/I118347220"]}],"countries":["CN","JP"],"is_corresponding":true,"raw_author_name":"HANG LI","raw_affiliation_strings":["Current Address: Hang Li, Microsoft Research Asia, 5F Sigma Center, No. 49 Zhichun Road Haidian District, Beijing, China 100080. Email:","Theory NEC Laboratory, Real World Computing Partnership, c/o Internet Systems Research Laboratories, NEC Corporation, 4-1-1 Miyazaki, Miyamae-ku, Kawasaki 216-8555, Japan; e-mail:","Theory NEC Laboratory, Real World Computing Partnership, c/o Internet Systems Research Laboratories, NEC Corporation, 4-1-1 Miyazaki, Miyamae-ku, Kawasaki 216-8555, Japan"],"affiliations":[{"raw_affiliation_string":"Current Address: Hang Li, Microsoft Research Asia, 5F Sigma Center, No. 49 Zhichun Road Haidian District, Beijing, China 100080. Email:","institution_ids":["https://openalex.org/I4210113369"]},{"raw_affiliation_string":"Theory NEC Laboratory, Real World Computing Partnership, c/o Internet Systems Research Laboratories, NEC Corporation, 4-1-1 Miyazaki, Miyamae-ku, Kawasaki 216-8555, Japan; e-mail:","institution_ids":["https://openalex.org/I118347220"]},{"raw_affiliation_string":"Theory NEC Laboratory, Real World Computing Partnership, c/o Internet Systems Research Laboratories, NEC Corporation, 4-1-1 Miyazaki, Miyamae-ku, Kawasaki 216-8555, Japan","institution_ids":["https://openalex.org/I118347220"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5100455129"],"corresponding_institution_ids":["https://openalex.org/I118347220","https://openalex.org/I4210113369"],"apc_list":null,"apc_paid":null,"fwci":2.9183,"has_fulltext":false,"cited_by_count":40,"citation_normalized_percentile":{"value":0.91731736,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"8","issue":"1","first_page":"25","last_page":"42"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8653606176376343},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.7578425407409668},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.6652750968933105},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6553756594657898},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.554084300994873},{"id":"https://openalex.org/keywords/word-sense-disambiguation","display_name":"Word-sense disambiguation","score":0.5061737895011902},{"id":"https://openalex.org/keywords/thesaurus","display_name":"Thesaurus","score":0.5012145042419434},{"id":"https://openalex.org/keywords/extension","display_name":"Extension (predicate logic)","score":0.4850401282310486},{"id":"https://openalex.org/keywords/joint-probability-distribution","display_name":"Joint probability distribution","score":0.47968506813049316},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.4728776812553406},{"id":"https://openalex.org/keywords/probability-distribution","display_name":"Probability distribution","score":0.41527673602104187},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.32908445596694946},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.12885931134223938},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.07310286164283752}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8653606176376343},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.7578425407409668},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.6652750968933105},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6553756594657898},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.554084300994873},{"id":"https://openalex.org/C51646954","wikidata":"https://www.wikidata.org/wiki/Q48522","display_name":"Word-sense disambiguation","level":3,"score":0.5061737895011902},{"id":"https://openalex.org/C2778698081","wikidata":"https://www.wikidata.org/wiki/Q179797","display_name":"Thesaurus","level":2,"score":0.5012145042419434},{"id":"https://openalex.org/C2778029271","wikidata":"https://www.wikidata.org/wiki/Q5421931","display_name":"Extension (predicate logic)","level":2,"score":0.4850401282310486},{"id":"https://openalex.org/C18653775","wikidata":"https://www.wikidata.org/wiki/Q1333358","display_name":"Joint probability distribution","level":2,"score":0.47968506813049316},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.4728776812553406},{"id":"https://openalex.org/C149441793","wikidata":"https://www.wikidata.org/wiki/Q200726","display_name":"Probability distribution","level":2,"score":0.41527673602104187},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.32908445596694946},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.12885931134223938},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.07310286164283752},{"id":"https://openalex.org/C157659113","wikidata":"https://www.wikidata.org/wiki/Q533822","display_name":"WordNet","level":2,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1017/s1351324902002838","is_oa":false,"landing_page_url":"https://doi.org/10.1017/s1351324902002838","pdf_url":null,"source":{"id":"https://openalex.org/S18088403","display_name":"Natural Language Engineering","issn_l":"1351-3249","issn":["1351-3249","1469-8110"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Natural Language Engineering","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W71932142","https://openalex.org/W80610024","https://openalex.org/W151200788","https://openalex.org/W1509045587","https://openalex.org/W1516391399","https://openalex.org/W1517749756","https://openalex.org/W1553625635","https://openalex.org/W1626544186","https://openalex.org/W1632114991","https://openalex.org/W1754931551","https://openalex.org/W1964268963","https://openalex.org/W1966812932","https://openalex.org/W1987071340","https://openalex.org/W1994507518","https://openalex.org/W2015042937","https://openalex.org/W2033894198","https://openalex.org/W2038826915","https://openalex.org/W2056957301","https://openalex.org/W2061023833","https://openalex.org/W2076002267","https://openalex.org/W2076800308","https://openalex.org/W2095734615","https://openalex.org/W2095958485","https://openalex.org/W2114886551","https://openalex.org/W2119610041","https://openalex.org/W2121227244","https://openalex.org/W2123084125","https://openalex.org/W2127314673","https://openalex.org/W2163953154","https://openalex.org/W2166776180","https://openalex.org/W2295081780","https://openalex.org/W2949154782","https://openalex.org/W2949549191","https://openalex.org/W2950457025"],"related_works":["https://openalex.org/W2386544544","https://openalex.org/W3195950251","https://openalex.org/W2146816430","https://openalex.org/W2293111794","https://openalex.org/W2479176469","https://openalex.org/W1989375079","https://openalex.org/W1568983069","https://openalex.org/W2970166416","https://openalex.org/W2276331713","https://openalex.org/W2403014995"],"abstract_inverted_index":{"We":[0,25,45,83],"address":[1],"the":[2,21,27,39,52,92,119,123],"problem":[3,29],"of":[4,32,42,71,94,101],"clustering":[5,28,65],"words":[6],"(or":[7],"constructing":[8],"a":[9,34,61,68,86,102],"thesaurus)":[10],"based":[11,50],"on":[12,51],"co-occurrence":[13],"data,":[14],"and":[15,80,99],"conducting":[16],"syntactic":[17,87],"disambiguation":[18,88,107,125],"by":[19,110,122],"using":[20],"acquired":[22],"word":[23,43,97],"classes.":[24],"view":[26],"as":[30],"that":[31,72,100],"estimating":[33,59],"class-based":[35],"probability":[36,62],"distribution":[37],"specifying":[38],"joint":[40],"probabilities":[41],"pairs.":[44],"propose":[46,85],"an":[47],"efficient":[48],"algorithm":[49],"Minimum":[53],"Description":[54],"Length":[55],"(MDL)":[56],"principle":[57],"for":[58],"such":[60],"model.":[63],"Our":[64],"method":[66,89,112],"is":[67,113],"natural":[69],"extension":[70],"proposed":[73],"in":[74],"Brown,":[75],"Della":[76],"Pietra,":[77],"deSouza,":[78],"Lai":[79],"Mercer":[81],"(1992).":[82],"next":[84],"which":[90,115],"combines":[91],"use":[93],"automatically":[95],"constructed":[96],"classes":[98],"hand-made":[103],"thesaurus.":[104],"The":[105],"overall":[106],"accuracy":[108],"achieved":[109],"our":[111],"88.2%,":[114],"compares":[116],"favorably":[117],"against":[118],"accuracies":[120],"obtained":[121],"state-of-the-art":[124],"methods.":[126]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":2},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":3},{"year":2014,"cited_by_count":4},{"year":2013,"cited_by_count":1},{"year":2012,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
