{"id":"https://openalex.org/W7127429488","doi":"https://doi.org/10.48550/arxiv.2602.01193","title":"Bridging Lexical Ambiguity and Vision: A Mini Review on Visual Word Sense Disambiguation","display_name":"Bridging Lexical Ambiguity and Vision: A Mini Review on Visual Word Sense Disambiguation","publication_year":2026,"publication_date":"2026-02-01","ids":{"openalex":"https://openalex.org/W7127429488","doi":"https://doi.org/10.48550/arxiv.2602.01193"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.01193","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124927329","display_name":"Shashini Nilukshi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nilukshi, Shashini","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5094255504","display_name":"Deshan Sumanathilaka","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sumanathilaka, Deshan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9505000114440918,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9505000114440918,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.014700000174343586,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11148","display_name":"Language, Metaphor, and Cognition","score":0.0034000000450760126,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/ambiguity","display_name":"Ambiguity","score":0.6972000002861023},{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.5364999771118164},{"id":"https://openalex.org/keywords/polysemy","display_name":"Polysemy","score":0.4034000039100647},{"id":"https://openalex.org/keywords/word-sense-disambiguation","display_name":"Word-sense disambiguation","score":0.38670000433921814},{"id":"https://openalex.org/keywords/reciprocal","display_name":"Reciprocal","score":0.38449999690055847},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.3675000071525574},{"id":"https://openalex.org/keywords/computational-linguistics","display_name":"Computational linguistics","score":0.3458000123500824}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7056000232696533},{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.6972000002861023},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6108999848365784},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5644000172615051},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.5364999771118164},{"id":"https://openalex.org/C2780276568","wikidata":"https://www.wikidata.org/wiki/Q191928","display_name":"Polysemy","level":2,"score":0.4034000039100647},{"id":"https://openalex.org/C51646954","wikidata":"https://www.wikidata.org/wiki/Q48522","display_name":"Word-sense disambiguation","level":3,"score":0.38670000433921814},{"id":"https://openalex.org/C2777742833","wikidata":"https://www.wikidata.org/wiki/Q1964083","display_name":"Reciprocal","level":2,"score":0.38449999690055847},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.3675000071525574},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.3662000000476837},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.3458000123500824},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.33649998903274536},{"id":"https://openalex.org/C164226766","wikidata":"https://www.wikidata.org/wiki/Q7293202","display_name":"Rank (graph theory)","level":2,"score":0.3107999861240387},{"id":"https://openalex.org/C2780876879","wikidata":"https://www.wikidata.org/wiki/Q3054749","display_name":"Meaning (existential)","level":2,"score":0.3061999976634979},{"id":"https://openalex.org/C2777462759","wikidata":"https://www.wikidata.org/wiki/Q18395344","display_name":"Word embedding","level":3,"score":0.29429998993873596},{"id":"https://openalex.org/C44083865","wikidata":"https://www.wikidata.org/wiki/Q3853443","display_name":"Mean reciprocal rank","level":2,"score":0.2849000096321106},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.2522999942302704}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.01193","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.01193","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.01193","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.01193","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.6235516667366028,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"This":[0],"paper":[1],"offers":[2],"a":[3,14,158],"mini":[4],"review":[5,58],"of":[6,17,50,96,135,160,176],"Visual":[7],"Word":[8,19],"Sense":[9,20],"Disambiguation":[10,21],"(VWSD),":[11],"which":[12],"is":[13],"multimodal":[15,64],"extension":[16],"traditional":[18],"(WSD).":[22],"VWSD":[23,41,97,125],"helps":[24],"tackle":[25],"lexical":[26,39],"ambiguity":[27],"in":[28,139,151],"vision-language":[29],"tasks.":[30],"While":[31],"conventional":[32],"WSD":[33],"depends":[34],"only":[35],"on":[36,107],"text":[37,55],"and":[38,79,101,111,123,163,181,191],"resources,":[40],"uses":[42],"visual":[43],"cues":[44],"to":[45,67,88,92,113,137],"find":[46],"the":[47,94,164,173,185],"right":[48],"meaning":[49],"ambiguous":[51],"words":[52],"with":[53],"minimal":[54],"input.":[56],"The":[57,170],"looks":[59],"at":[60],"developments":[61],"from":[62,86],"early":[63],"fusion":[65],"methods":[66],"new":[68],"frameworks":[69],"that":[70,119],"use":[71],"contrastive":[72,102],"models":[73,122],"like":[74],"CLIP,":[75],"diffusion-based":[76],"text-to-image":[77],"generation,":[78,180],"large":[80],"language":[81],"model":[82,153],"(LLM)":[83],"support.":[84],"Studies":[85],"2016":[87],"2025":[89],"are":[90],"examined":[91],"show":[93,118],"growth":[95],"through":[98],"feature-based,":[99],"graph-based,":[100],"embedding":[103],"techniques.":[104],"It":[105],"focuses":[106],"prompt":[108],"engineering,":[109],"fine-tuning,":[110],"adapting":[112],"multiple":[114],"languages.":[115],"Quantitative":[116],"results":[117],"CLIP-based":[120],"fine-tuned":[121],"LLM-enhanced":[124],"systems":[126],"consistently":[127],"perform":[128],"better":[129,167],"than":[130],"zero-shot":[131],"baselines,":[132],"achieving":[133],"gains":[134],"up":[136],"6-8\\%":[138],"Mean":[140],"Reciprocal":[141],"Rank":[142],"(MRR).":[143],"However,":[144],"challenges":[145],"still":[146],"exist,":[147],"such":[148],"as":[149,184],"limitations":[150],"context,":[152],"bias":[154],"toward":[155],"common":[156],"meanings,":[157],"lack":[159],"multilingual":[161,192],"datasets,":[162],"need":[165],"for":[166,188],"evaluation":[168],"frameworks.":[169],"analysis":[171],"highlights":[172],"growing":[174],"overlap":[175],"CLIP":[177],"alignment,":[178],"diffusion":[179],"LLM":[182],"reasoning":[183],"future":[186],"path":[187],"strong,":[189],"context-aware,":[190],"disambiguation":[193],"systems.":[194]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-02-04T00:00:00"}
