{"id":"https://openalex.org/W2100322480","doi":"https://doi.org/10.48550/arxiv.cs/0407065","title":"Word Sense Disambiguation by Web Mining for Word Co-occurrence Probabilities","display_name":"Word Sense Disambiguation by Web Mining for Word Co-occurrence Probabilities","publication_year":2004,"publication_date":"2004-07-29","ids":{"openalex":"https://openalex.org/W2100322480","doi":"https://doi.org/10.48550/arxiv.cs/0407065","mag":"2100322480"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:cs/0407065","is_oa":true,"landing_page_url":"http://arxiv.org/abs/cs/0407065","pdf_url":"https://arxiv.org/pdf/cs/0407065","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/cs/0407065","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5027819738","display_name":"Peter D. Turney","orcid":"https://orcid.org/0000-0003-0909-4085"},"institutions":[{"id":"https://openalex.org/I4210159778","display_name":"National Research Council Canada","ror":"https://ror.org/04mte1k06","country_code":"CA","type":"government","lineage":["https://openalex.org/I4210159778"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Turney, Peter D.","raw_affiliation_strings":["National Research Council of Canada"],"affiliations":[{"raw_affiliation_string":"National Research Council of Canada","institution_ids":["https://openalex.org/I4210159778"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5027819738"],"corresponding_institution_ids":["https://openalex.org/I4210159778"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9947999715805054,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/word-sense-disambiguation","display_name":"Word-sense disambiguation","score":0.8108600974082947},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.7745059728622437},{"id":"https://openalex.org/keywords/semeval","display_name":"SemEval","score":0.7146344780921936},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6170660853385925},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5661731362342834},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.44433847069740295},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4336961507797241},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08877193927764893},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.06411466002464294}],"concepts":[{"id":"https://openalex.org/C51646954","wikidata":"https://www.wikidata.org/wiki/Q48522","display_name":"Word-sense disambiguation","level":3,"score":0.8108600974082947},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.7745059728622437},{"id":"https://openalex.org/C44572571","wikidata":"https://www.wikidata.org/wiki/Q7448970","display_name":"SemEval","level":3,"score":0.7146344780921936},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6170660853385925},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5661731362342834},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.44433847069740295},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4336961507797241},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08877193927764893},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.06411466002464294},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:cs/0407065","is_oa":true,"landing_page_url":"http://arxiv.org/abs/cs/0407065","pdf_url":"https://arxiv.org/pdf/cs/0407065","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.cs/0407065","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.cs/0407065","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:cs/0407065","is_oa":true,"landing_page_url":"http://arxiv.org/abs/cs/0407065","pdf_url":"https://arxiv.org/pdf/cs/0407065","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.6800000071525574}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":7,"referenced_works":["https://openalex.org/W12297830","https://openalex.org/W1486865875","https://openalex.org/W1567365482","https://openalex.org/W1987878450","https://openalex.org/W2035512259","https://openalex.org/W2093641143","https://openalex.org/W2123504579"],"related_works":["https://openalex.org/W2101293500","https://openalex.org/W2384058382","https://openalex.org/W2000205775","https://openalex.org/W2124313972","https://openalex.org/W2251529656","https://openalex.org/W2324822715","https://openalex.org/W2330879361","https://openalex.org/W2140343536","https://openalex.org/W2783863087","https://openalex.org/W2188275805"],"abstract_inverted_index":{"This":[0],"paper":[1],"describes":[2],"the":[3,16,41,65,70,79,82,86,99],"National":[4],"Research":[5],"Council":[6],"(NRC)":[7],"Word":[8],"Sense":[9],"Disambiguation":[10],"(WSD)":[11],"system,":[12],"as":[13,29,40,55],"applied":[14],"to":[15],"English":[17],"Lexical":[18],"Sample":[19],"(ELS)":[20],"task":[21],"in":[22,78],"Senseval-3.":[23],"The":[24,75,94],"NRC":[25],"system":[26,80],"approaches":[27],"WSD":[28],"a":[30,104,115],"classical":[31],"supervised":[32],"machine":[33,43],"learning":[34,44],"problem,":[35],"using":[36,98],"familiar":[37],"tools":[38],"such":[39],"Weka":[42],"software":[45],"and":[46,69],"Brill's":[47],"rule-based":[48],"part-of-speech":[49],"tagger.":[50],"Head":[51],"words":[52],"are":[53,67,73,96],"represented":[54],"feature":[56],"vectors":[57],"with":[58,103],"several":[59],"hundred":[60],"features.":[61],"Approximately":[62],"half":[63,72],"of":[64,106,110],"features":[66],"syntactic":[68],"other":[71],"semantic.":[74],"main":[76],"novelty":[77],"is":[81],"method":[83],"for":[84],"generating":[85],"semantic":[87],"features,":[88],"based":[89],"on":[90],"word":[91],"\\hbox{co-occurrence}":[92],"probabilities.":[93],"probabilities":[95],"estimated":[97],"Waterloo":[100],"MultiText":[101],"System":[102],"corpus":[105],"about":[107],"one":[108],"terabyte":[109],"unlabeled":[111],"text,":[112],"collected":[113],"by":[114],"web":[116],"crawler.":[117]},"counts_by_year":[{"year":2016,"cited_by_count":1},{"year":2013,"cited_by_count":1},{"year":2012,"cited_by_count":1}],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
