{"id":"https://openalex.org/W2737251471","doi":"https://doi.org/10.18653/v1/e17-1086","title":"ShotgunWSD: An unsupervised algorithm for global word sense disambiguation inspired by DNA sequencing","display_name":"ShotgunWSD: An unsupervised algorithm for global word sense disambiguation inspired by DNA sequencing","publication_year":2017,"publication_date":"2017-01-01","ids":{"openalex":"https://openalex.org/W2737251471","doi":"https://doi.org/10.18653/v1/e17-1086","mag":"2737251471"},"language":"en","primary_location":{"id":"doi:10.18653/v1/e17-1086","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/e17-1086","pdf_url":"https://www.aclweb.org/anthology/E17-1086.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 15th Conference of the European Chapter of the\n          Association for Computational Linguistics: Volume 1, Long Papers","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/E17-1086.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030278465","display_name":"Andrei M. Butnaru","orcid":"https://orcid.org/0000-0003-3031-3203"},"institutions":[{"id":"https://openalex.org/I141595442","display_name":"University of Bucharest","ror":"https://ror.org/02x2v6p15","country_code":"RO","type":"education","lineage":["https://openalex.org/I141595442"]}],"countries":["RO"],"is_corresponding":true,"raw_author_name":"Andrei Butnaru","raw_affiliation_strings":["University of Bucharest Department of Computer Science 14 Academiei, Bucharest, Romania","University of Bucharest, Bucharest, Romania"],"affiliations":[{"raw_affiliation_string":"University of Bucharest Department of Computer Science 14 Academiei, Bucharest, Romania","institution_ids":["https://openalex.org/I141595442"]},{"raw_affiliation_string":"University of Bucharest, Bucharest, Romania","institution_ids":["https://openalex.org/I141595442"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081017623","display_name":"Radu Tudor Ionescu","orcid":"https://orcid.org/0000-0002-9301-1950"},"institutions":[{"id":"https://openalex.org/I141595442","display_name":"University of Bucharest","ror":"https://ror.org/02x2v6p15","country_code":"RO","type":"education","lineage":["https://openalex.org/I141595442"]}],"countries":["RO"],"is_corresponding":false,"raw_author_name":"Radu Tudor Ionescu","raw_affiliation_strings":["University of Bucharest Department of Computer Science 14 Academiei, Bucharest, Romania","University of Bucharest, Bucharest, Romania"],"affiliations":[{"raw_affiliation_string":"University of Bucharest Department of Computer Science 14 Academiei, Bucharest, Romania","institution_ids":["https://openalex.org/I141595442"]},{"raw_affiliation_string":"University of Bucharest, Bucharest, Romania","institution_ids":["https://openalex.org/I141595442"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5080746661","display_name":"Florentina Hristea","orcid":"https://orcid.org/0000-0003-2142-9781"},"institutions":[{"id":"https://openalex.org/I141595442","display_name":"University of Bucharest","ror":"https://ror.org/02x2v6p15","country_code":"RO","type":"education","lineage":["https://openalex.org/I141595442"]}],"countries":["RO"],"is_corresponding":false,"raw_author_name":"Florentina Hristea","raw_affiliation_strings":["University of Bucharest Department of Computer Science 14 Academiei, Bucharest, Romania","University of Bucharest, Bucharest, Romania"],"affiliations":[{"raw_affiliation_string":"University of Bucharest Department of Computer Science 14 Academiei, Bucharest, Romania","institution_ids":["https://openalex.org/I141595442"]},{"raw_affiliation_string":"University of Bucharest, Bucharest, Romania","institution_ids":["https://openalex.org/I141595442"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5030278465"],"corresponding_institution_ids":["https://openalex.org/I141595442"],"apc_list":null,"apc_paid":null,"fwci":0.8308,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.80441474,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"916","last_page":"926"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9869999885559082,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9817000031471252,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7152847647666931},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.6539086103439331},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.6275821924209595},{"id":"https://openalex.org/keywords/prefix","display_name":"Prefix","score":0.54051673412323},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5271874666213989},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.48616668581962585},{"id":"https://openalex.org/keywords/sliding-window-protocol","display_name":"Sliding window protocol","score":0.45666414499282837},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.45056337118148804},{"id":"https://openalex.org/keywords/term","display_name":"Term (time)","score":0.4214918315410614},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.38045433163642883},{"id":"https://openalex.org/keywords/window","display_name":"Window (computing)","score":0.19585078954696655},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1685401201248169}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7152847647666931},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.6539086103439331},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.6275821924209595},{"id":"https://openalex.org/C141603448","wikidata":"https://www.wikidata.org/wiki/Q134830","display_name":"Prefix","level":2,"score":0.54051673412323},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5271874666213989},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.48616668581962585},{"id":"https://openalex.org/C102392041","wikidata":"https://www.wikidata.org/wiki/Q592860","display_name":"Sliding window protocol","level":3,"score":0.45666414499282837},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.45056337118148804},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.4214918315410614},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38045433163642883},{"id":"https://openalex.org/C2778751112","wikidata":"https://www.wikidata.org/wiki/Q835016","display_name":"Window (computing)","level":2,"score":0.19585078954696655},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1685401201248169},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.18653/v1/e17-1086","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/e17-1086","pdf_url":"https://www.aclweb.org/anthology/E17-1086.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 15th Conference of the European Chapter of the\n          Association for Computational Linguistics: Volume 1, Long Papers","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1707.08084","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1707.08084","pdf_url":"https://arxiv.org/pdf/1707.08084","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:2737251471","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1707.08084.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1707.08084","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1707.08084","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.18653/v1/e17-1086","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/e17-1086","pdf_url":"https://www.aclweb.org/anthology/E17-1086.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 15th Conference of the European Chapter of the\n          Association for Computational Linguistics: Volume 1, Long Papers","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320310086","display_name":"Helsingin Yliopisto","ror":"https://ror.org/040af2s02"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2737251471.pdf","grobid_xml":"https://content.openalex.org/works/W2737251471.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2963147736","https://openalex.org/W2770933788","https://openalex.org/W2912955678","https://openalex.org/W3008835696","https://openalex.org/W2400046211","https://openalex.org/W2913363441","https://openalex.org/W2767719051","https://openalex.org/W2183216435","https://openalex.org/W2187550388","https://openalex.org/W2389961636","https://openalex.org/W2008856942","https://openalex.org/W3157345281","https://openalex.org/W2962702703","https://openalex.org/W2580559125","https://openalex.org/W2169170568","https://openalex.org/W2736701013","https://openalex.org/W1541776932","https://openalex.org/W2417581944","https://openalex.org/W2226088910","https://openalex.org/W2588090883"],"abstract_inverted_index":{"In":[0,84],"this":[1],"paper,":[2],"we":[3],"present":[4],"a":[5,23,52,74,122,153,182,200],"novel":[6],"unsupervised":[7,144],"algorithm":[8,19,44,55,140,162,180],"for":[9,31,81],"word":[10,117,135],"sense":[11,79,90,114],"disambiguation":[12],"(WSD)":[13],"at":[14],"the":[15,27,37,68,85,113,128,134,168],"document":[16,69],"level.":[17],"Our":[18],"is":[20,45,56,118,188],"inspired":[21],"by":[22,109,152],"widely-used":[24],"approach":[25],"in":[26,70,132],"field":[28],"of":[29,77,115,186],"genetics":[30],"whole":[32],"genome":[33],"sequencing,":[34],"known":[35],"as":[36],"Shotgun":[38],"sequencing":[39],"technique.":[40],"The":[41,104],"proposed":[42],"WSD":[43,54,145],"based":[46,98,120],"on":[47,99,121,174],"three":[48],"main":[49],"steps.":[50],"First,":[51],"brute-force":[53],"applied":[57],"to":[58,63,72,190],"short":[59,75],"context":[60],"windows":[61],"(up":[62],"10":[64],"words)":[65],"selected":[66],"from":[67],"order":[71],"generate":[73],"list":[76],"likely":[78],"configurations":[80,91,97,106,131],"each":[82,116],"window.":[83],"second":[86],"step,":[87],"these":[88],"local":[89],"are":[92,107],"assembled":[93],"into":[94],"longer":[95],"composite":[96],"suffix":[100],"and":[101,112,147],"prefix":[102],"matching.":[103],"resulted":[105],"ranked":[108],"their":[110],"length,":[111],"chosen":[119],"voting":[123],"scheme":[124],"that":[125,160],"considers":[126],"only":[127],"top":[129],"k":[130],"which":[133],"appears.":[136],"We":[137,157],"compare":[138],"our":[139,161,179],"with":[141],"other":[142,195],"state-of-the-art":[143],"algorithms":[146],"demonstrate":[148],"better":[149,165],"performance,":[150],"sometimes":[151],"very":[154,183],"large":[155],"margin.":[156],"also":[158],"show":[159],"can":[163],"yield":[164],"performance":[166],"than":[167],"Most":[169],"Common":[170],"Sense":[171],"(MCS)":[172],"baseline":[173],"one":[175],"data":[176],"set.":[177],"Moreover,":[178],"has":[181],"small":[184],"number":[185],"parameters,":[187],"robust":[189],"parameter":[191],"tuning,":[192],"and,":[193],"unlike":[194],"bioinspired":[196],"methods,":[197],"it":[198],"gives":[199],"deterministic":[201],"solution":[202],"(it":[203],"does":[204],"not":[205],"involve":[206],"random":[207],"choices).":[208]},"counts_by_year":[{"year":2020,"cited_by_count":1},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
