{"id":"https://openalex.org/W2897507397","doi":"https://doi.org/10.18653/v1/w18-6304","title":"An Analysis of Attention Mechanisms: The Case of Word Sense Disambiguation in Neural Machine Translation","display_name":"An Analysis of Attention Mechanisms: The Case of Word Sense Disambiguation in Neural Machine Translation","publication_year":2018,"publication_date":"2018-01-01","ids":{"openalex":"https://openalex.org/W2897507397","doi":"https://doi.org/10.18653/v1/w18-6304","mag":"2897507397"},"language":"en","primary_location":{"id":"doi:10.18653/v1/w18-6304","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w18-6304","pdf_url":"https://www.aclweb.org/anthology/W18-6304.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Third Conference on Machine Translation: Research Papers","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/W18-6304.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056246401","display_name":"Gongbo Tang","orcid":"https://orcid.org/0009-0003-0443-3217"},"institutions":[{"id":"https://openalex.org/I123387679","display_name":"Uppsala University","ror":"https://ror.org/048a87296","country_code":"SE","type":"education","lineage":["https://openalex.org/I123387679"]}],"countries":["SE"],"is_corresponding":true,"raw_author_name":"Gongbo Tang","raw_affiliation_strings":["Department of Linguistics and Philology, Uppsala University"],"affiliations":[{"raw_affiliation_string":"Department of Linguistics and Philology, Uppsala University","institution_ids":["https://openalex.org/I123387679"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005771535","display_name":"Rico Sennrich","orcid":"https://orcid.org/0000-0002-1438-4741"},"institutions":[{"id":"https://openalex.org/I202697423","display_name":"University of Zurich","ror":"https://ror.org/02crff812","country_code":"CH","type":"education","lineage":["https://openalex.org/I202697423"]},{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["CH","GB"],"is_corresponding":false,"raw_author_name":"Rico Sennrich","raw_affiliation_strings":["Institute of Computational Linguistics, University of Zurich","School of Informatics, University of Edinburgh"],"affiliations":[{"raw_affiliation_string":"Institute of Computational Linguistics, University of Zurich","institution_ids":["https://openalex.org/I202697423"]},{"raw_affiliation_string":"School of Informatics, University of Edinburgh","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077063699","display_name":"Joakim Nivre","orcid":null},"institutions":[{"id":"https://openalex.org/I123387679","display_name":"Uppsala University","ror":"https://ror.org/048a87296","country_code":"SE","type":"education","lineage":["https://openalex.org/I123387679"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Joakim Nivre","raw_affiliation_strings":["Department of Linguistics and Philology, Uppsala University"],"affiliations":[{"raw_affiliation_string":"Department of Linguistics and Philology, Uppsala University","institution_ids":["https://openalex.org/I123387679"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5056246401"],"corresponding_institution_ids":["https://openalex.org/I123387679"],"apc_list":null,"apc_paid":null,"fwci":14.3841,"has_fulltext":true,"cited_by_count":107,"citation_normalized_percentile":{"value":0.99011055,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"26","last_page":"35"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9782999753952026,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8303886651992798},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.801841139793396},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6723688840866089},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6075067520141602},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5960187315940857},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5488923788070679},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5458195805549622},{"id":"https://openalex.org/keywords/noun","display_name":"Noun","score":0.5416077375411987},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.522266685962677},{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.49727848172187805},{"id":"https://openalex.org/keywords/mechanism","display_name":"Mechanism (biology)","score":0.49159273505210876},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.47062352299690247},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.1940923035144806}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8303886651992798},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.801841139793396},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6723688840866089},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6075067520141602},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5960187315940857},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5488923788070679},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5458195805549622},{"id":"https://openalex.org/C121934690","wikidata":"https://www.wikidata.org/wiki/Q1084","display_name":"Noun","level":2,"score":0.5416077375411987},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.522266685962677},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.49727848172187805},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.49159273505210876},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.47062352299690247},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.1940923035144806},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.18653/v1/w18-6304","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w18-6304","pdf_url":"https://www.aclweb.org/anthology/W18-6304.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Third Conference on Machine Translation: Research Papers","raw_type":"proceedings-article"},{"id":"pmh:oai:www.zora.uzh.ch:170307","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306401281","display_name":"Zurich Open Repository and Archive (University of Zurich)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I202697423","host_organization_name":"University of Zurich","host_organization_lineage":["https://openalex.org/I202697423"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"Tang, Gongbo; Sennrich, Rico; Nivre, Joakim  (2018). An Analysis of Attention Mechanisms: The Case of Word Sense Disambiguation in Neural Machine Translation.  In: Proceedings of the Third Conference on Machine Translation, Brussels, 31 October 2018 - 1 November 2018, Association for Computational Linguistics.","raw_type":"Conference or Workshop Item"},{"id":"pmh:oai:pure.ed.ac.uk:openaire/90a211de-1102-4e24-a66b-d555190a842f","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/en/publications/90a211de-1102-4e24-a66b-d555190a842f","pdf_url":null,"source":{"id":"https://openalex.org/S4306400321","display_name":"Edinburgh Research Explorer (University of Edinburgh)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I98677209","host_organization_name":"University of Edinburgh","host_organization_lineage":["https://openalex.org/I98677209"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Tang, G, Sennrich, R & Nivre, J 2018, An Analysis of Attention Mechanisms: The Case of Word Sense Disambiguation in Neural Machine Translation. in EMNLP 2018 THIRD CONFERENCE ON MACHINE TRANSLATION (WMT18). Brussels, Belgium, pp. 26-35, EMNLP 2018 Third Conference on Machine Translation (WMT18), Brussels, Belgium, 31/10/18. < http://aclweb.org/anthology/W18-6304 >","raw_type":"contributionToPeriodical"},{"id":"pmh:oai:pure.ed.ac.uk:publications/90a211de-1102-4e24-a66b-d555190a842f","is_oa":true,"landing_page_url":"http://aclweb.org/anthology/W18-6304","pdf_url":null,"source":{"id":"https://openalex.org/S4306400321","display_name":"Edinburgh Research Explorer (University of Edinburgh)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I98677209","host_organization_name":"University of Edinburgh","host_organization_lineage":["https://openalex.org/I98677209"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""},{"id":"doi:10.5167/uzh-170307","is_oa":true,"landing_page_url":"https://doi.org/10.5167/uzh-170307","pdf_url":null,"source":{"id":"https://openalex.org/S7407051291","display_name":"Universit\u00e4t Z\u00fcrich, ZORA","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":""}],"best_oa_location":{"id":"doi:10.18653/v1/w18-6304","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w18-6304","pdf_url":"https://www.aclweb.org/anthology/W18-6304.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Third Conference on Machine Translation: Research Papers","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.6499999761581421}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2897507397.pdf","grobid_xml":"https://content.openalex.org/works/W2897507397.grobid-xml"},"referenced_works_count":28,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1753482797","https://openalex.org/W1902237438","https://openalex.org/W2101454539","https://openalex.org/W2130942839","https://openalex.org/W2133564696","https://openalex.org/W2148708890","https://openalex.org/W2157331557","https://openalex.org/W2186615578","https://openalex.org/W2613904329","https://openalex.org/W2759173152","https://openalex.org/W2778814079","https://openalex.org/W2798761464","https://openalex.org/W2808154809","https://openalex.org/W2888539709","https://openalex.org/W2962697716","https://openalex.org/W2962784628","https://openalex.org/W2963403868","https://openalex.org/W2963506925","https://openalex.org/W2963715460","https://openalex.org/W2964174820","https://openalex.org/W2964265128","https://openalex.org/W2964308564","https://openalex.org/W3082674894","https://openalex.org/W4241645538","https://openalex.org/W4298170715","https://openalex.org/W4385245566","https://openalex.org/W4386506836"],"related_works":["https://openalex.org/W3176018525","https://openalex.org/W2903533908","https://openalex.org/W2903810591","https://openalex.org/W3026554633","https://openalex.org/W2187606256","https://openalex.org/W2888520903","https://openalex.org/W2890256614","https://openalex.org/W3098873988","https://openalex.org/W2949454572","https://openalex.org/W3066373881"],"abstract_inverted_index":{"Recent":[0],"work":[1],"has":[2],"shown":[3],"that":[4,47,73,98,119,144],"the":[5,17,35,62,83,102,131,136,145,158,167],"encoderdecoder":[6],"attention":[7,32,48,52,63,74,81,99,137],"mechanisms":[8,49,75],"in":[9,20,34,42,91,130,139],"neural":[10],"machine":[11,22],"translation":[12],"(NMT)":[13],"are":[14,76],"different":[15],"from":[16,166],"word":[18,38],"alignment":[19],"statistical":[21],"translation.":[23],"In":[24],"this":[25],"paper,":[26],"we":[27,71,142],"focus":[28],"on":[29],"analyzing":[30],"encoder-decoder":[31],"mechanisms,":[33],"case":[36],"of":[37],"sense":[39],"disambiguation":[40],"(WSD)":[41],"NMT":[43,107,120],"models.":[44],"We":[45,60,96],"hypothesize":[46],"pay":[50],"more":[51,80],"to":[53,78,82,93,109,123,151,163],"context":[54,89,171],"tokens":[55,156],"when":[56,66],"translating":[57,67],"ambiguous":[58,68,84],"words.":[59],"explore":[61],"distribution":[64],"patterns":[65],"nouns.":[69,95],"Counterintuitively,":[70],"find":[72],"likely":[77],"distribute":[79],"noun":[85],"itself":[86],"rather":[87],"than":[88],"tokens,":[90],"comparison":[92],"other":[94],"conclude":[97],"is":[100],"not":[101],"main":[103],"mechanism":[104,138],"used":[105],"by":[106],"models":[108,121],"incorporate":[110],"contextual":[111,125],"information":[112,126],"for":[113,128],"WSD.":[114],"The":[115],"experimental":[116],"results":[117],"suggest":[118],"learn":[122,150,162],"encode":[124],"necessary":[127],"WSD":[129],"encoder":[132],"hidden":[133],"states.":[134],"For":[135],"Transformer":[140],"models,":[141],"reveal":[143],"first":[146],"few":[147,160],"layers":[148,161],"gradually":[149],"\"align\"":[152],"source":[153],"and":[154,157],"target":[155],"last":[159],"extract":[164],"features":[165],"related":[168],"but":[169],"unaligned":[170],"tokens.":[172]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":9},{"year":2021,"cited_by_count":19},{"year":2020,"cited_by_count":45},{"year":2019,"cited_by_count":19},{"year":2018,"cited_by_count":2}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2019-06-27T00:00:00"}
