{"id":"https://openalex.org/W2952490462","doi":"https://doi.org/10.18653/v1/p19-1187","title":"Boosting Entity Linking Performance by Leveraging Unlabeled Documents","display_name":"Boosting Entity Linking Performance by Leveraging Unlabeled Documents","publication_year":2019,"publication_date":"2019-01-01","ids":{"openalex":"https://openalex.org/W2952490462","doi":"https://doi.org/10.18653/v1/p19-1187","mag":"2952490462"},"language":"en","primary_location":{"id":"doi:10.18653/v1/p19-1187","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/p19-1187","pdf_url":"https://www.aclweb.org/anthology/P19-1187.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/P19-1187.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5061504918","display_name":"Phong Ba Le","orcid":null},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Phong Le","raw_affiliation_strings":["University of Edinburgh"],"affiliations":[{"raw_affiliation_string":"University of Edinburgh","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5086717154","display_name":"Ivan Titov","orcid":"https://orcid.org/0000-0002-2583-1893"},"institutions":[{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]},{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB","NL"],"is_corresponding":true,"raw_author_name":"Ivan Titov","raw_affiliation_strings":["University of Amsterdam","University of Edinburgh"],"affiliations":[{"raw_affiliation_string":"University of Amsterdam","institution_ids":["https://openalex.org/I887064364"]},{"raw_affiliation_string":"University of Edinburgh","institution_ids":["https://openalex.org/I98677209"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5086717154"],"corresponding_institution_ids":["https://openalex.org/I887064364","https://openalex.org/I98677209"],"apc_list":null,"apc_paid":null,"fwci":4.4797,"has_fulltext":true,"cited_by_count":45,"citation_normalized_percentile":{"value":0.95640446,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1935","last_page":"1945"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.983299970626831,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.888809323310852},{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting (machine learning)","score":0.6412979960441589},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6006374955177307},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.5872035622596741},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.5595570802688599},{"id":"https://openalex.org/keywords/test-set","display_name":"Test set","score":0.52525395154953},{"id":"https://openalex.org/keywords/precision-and-recall","display_name":"Precision and recall","score":0.5081329345703125},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5065814256668091},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.4979217052459717},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.49020296335220337},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.47113245725631714},{"id":"https://openalex.org/keywords/coherence","display_name":"Coherence (philosophical gambling strategy)","score":0.4598598778247833},{"id":"https://openalex.org/keywords/labeled-data","display_name":"Labeled data","score":0.443459689617157},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.4329189360141754},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.38446542620658875}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.888809323310852},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.6412979960441589},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6006374955177307},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.5872035622596741},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.5595570802688599},{"id":"https://openalex.org/C169903167","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Test set","level":2,"score":0.52525395154953},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.5081329345703125},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5065814256668091},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.4979217052459717},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.49020296335220337},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.47113245725631714},{"id":"https://openalex.org/C2781181686","wikidata":"https://www.wikidata.org/wiki/Q4226068","display_name":"Coherence (philosophical gambling strategy)","level":2,"score":0.4598598778247833},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.443459689617157},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.4329189360141754},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.38446542620658875},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.18653/v1/p19-1187","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/p19-1187","pdf_url":"https://www.aclweb.org/anthology/P19-1187.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics","raw_type":"proceedings-article"},{"id":"pmh:oai:dare.uva.nl:openaire/b9b334ba-28a1-44d3-847c-b1f2bd22d1d0","is_oa":true,"landing_page_url":"https://handle.uba.uva.nl/personal/pure/en/publications/boosting-entity-linking-performance-by-leveraging-unlabeled-documents(b9b334ba-28a1-44d3-847c-b1f2bd22d1d0).html","pdf_url":null,"source":{"id":"https://openalex.org/S4306400088","display_name":"UvA-DARE (University of Amsterdam)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I887064364","host_organization_name":"University of Amsterdam","host_organization_lineage":["https://openalex.org/I887064364"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Le, P & Titov, I 2019, Boosting Entity Linking Performance by Leveraging Unlabeled Documents. in A Korhonen, D Traum & L M\u00e0rquez (eds), The 57th Annual Meeting of the Association for Computational Linguistics : ACL 2019 : proceedings of the conference : July 28-August 2, 2019, Florence, Italy. Stroudsburg, PA, pp. 1935-1945, The 57th Annual Meeting of the Association for Computational Linguistics - ACL 2019, 28/07/19. https://doi.org/10.18653/v1/P19-1187","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:uvapub:oai:dare.uva.nl:publications/b9b334ba-28a1-44d3-847c-b1f2bd22d1d0","is_oa":true,"landing_page_url":"https://dare.uva.nl/personal/pure/en/publications/boosting-entity-linking-performance-by-leveraging-unlabeled-documents(b9b334ba-28a1-44d3-847c-b1f2bd22d1d0).html","pdf_url":"https://dare.uva.nl/personal/pure/en/publications/boosting-entity-linking-performance-by-leveraging-unlabeled-documents(b9b334ba-28a1-44d3-847c-b1f2bd22d1d0).html","source":{"id":"https://openalex.org/S4306401843","display_name":"Data Archiving and Networked Services (DANS)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1322597698","host_organization_name":"Royal Netherlands Academy of Arts and Sciences","host_organization_lineage":["https://openalex.org/I1322597698"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"The 57th Annual Meeting of the Association for Computational Linguistics: ACL 2019 : proceedings of the conference : July 28-August 2, 2019, Florence, Italy, 1935 - 1945","raw_type":"info:eu-repo/semantics/conferencepaper"},{"id":"pmh:oai:dare.uva.nl:openaire_cris_publications/b9b334ba-28a1-44d3-847c-b1f2bd22d1d0","is_oa":true,"landing_page_url":"https://vimeo.com/384532543","pdf_url":null,"source":{"id":"https://openalex.org/S4306400088","display_name":"UvA-DARE (University of Amsterdam)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I887064364","host_organization_name":"University of Amsterdam","host_organization_lineage":["https://openalex.org/I887064364"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Le, P & Titov, I 2019, Boosting Entity Linking Performance by Leveraging Unlabeled Documents. in A Korhonen, D Traum & L M\u00e0rquez (eds), The 57th Annual Meeting of the Association for Computational Linguistics : ACL 2019 : proceedings of the conference : July 28-August 2, 2019, Florence, Italy. Stroudsburg, PA, pp. 1935-1945, The 57th Annual Meeting of the Association for Computational Linguistics - ACL 2019, 28/07/19. https://doi.org/10.18653/v1/P19-1187","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"doi:10.18653/v1/p19-1187","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/p19-1187","pdf_url":"https://www.aclweb.org/anthology/P19-1187.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.6899999976158142,"id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G5226226009","display_name":"Scaling Semantic Parsing to Unrestricted Domains","funder_award_id":"639.022.518","funder_id":"https://openalex.org/F4320321800","funder_display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320310598","display_name":"Amazon Web Services","ror":"https://ror.org/04mv4n011"},{"id":"https://openalex.org/F4320321800","display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek","ror":"https://ror.org/04jsz6e67"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2952490462.pdf","grobid_xml":"https://content.openalex.org/works/W2952490462.grobid-xml"},"referenced_works_count":24,"referenced_works":["https://openalex.org/W11298561","https://openalex.org/W174427690","https://openalex.org/W1604644367","https://openalex.org/W1647671624","https://openalex.org/W2099926189","https://openalex.org/W2100341149","https://openalex.org/W2107598941","https://openalex.org/W2117729721","https://openalex.org/W2121127625","https://openalex.org/W2132679783","https://openalex.org/W2133227439","https://openalex.org/W2151048449","https://openalex.org/W2177768736","https://openalex.org/W2250758064","https://openalex.org/W2250869925","https://openalex.org/W2251894552","https://openalex.org/W2409706897","https://openalex.org/W2479758238","https://openalex.org/W2594284271","https://openalex.org/W2612773933","https://openalex.org/W2963212722","https://openalex.org/W2963691861","https://openalex.org/W2963855739","https://openalex.org/W2963866616"],"related_works":["https://openalex.org/W1987706094","https://openalex.org/W2797776314","https://openalex.org/W2001121861","https://openalex.org/W3193088696","https://openalex.org/W4303683898","https://openalex.org/W1459710595","https://openalex.org/W3150234497","https://openalex.org/W3165388794","https://openalex.org/W2889705046","https://openalex.org/W1018798833"],"abstract_inverted_index":{"Modern":[0],"entity":[1,70],"linking":[2,71],"systems":[3,116],"rely":[4],"on":[5,83,95,102,117,133],"large":[6],"collections":[7],"of":[8,37,47,86,98,152],"documents":[9,31,160],"specifically":[10],"annotated":[11],"for":[12,50],"the":[13,60,108,127,139,144],"task":[14],"(e.g.,":[15],"AIDA":[16],"CoNLL).":[17],"In":[18],"contrast,":[19],"we":[20,41,58,155],"propose":[21],"an":[22,54],"approach":[23,35,112],"which":[24],"exploits":[25],"only":[26],"naturally":[27],"occurring":[28],"information:":[29],"unlabeled":[30,55,159],"and":[32,101],"Wikipedia.":[33],"Our":[34],"consists":[36],"two":[38],"stages.":[39],"First,":[40],"construct":[42],"a":[43,84,134],"high":[44],"recall":[45],"list":[46],"candidate":[48,61],"entities":[49,76,92,106],"each":[51,99],"mention":[52,100],"in":[53,107,126],"document.":[56,109],"Second,":[57],"use":[59],"lists":[62],"as":[63,77],"weak":[64],"supervision":[65],"to":[66,90,142,149],"constrain":[67],"our":[68,153],"document-level":[69],"model.":[72],"The":[73,110],"model":[74],"treats":[75],"latent":[78],"variables":[79],"and,":[80],"when":[81,131],"estimated":[82],"collection":[85],"unlabelled":[87],"texts,":[88],"learns":[89],"choose":[91],"relying":[93],"both":[94],"local":[96],"context":[97],"coherence":[103],"with":[104,167],"other":[105],"resulting":[111],"rivals":[113],"fully-supervised":[114],"state-of-the-art":[115],"standard":[118],"test":[119,135],"sets.":[120],"It":[121],"also":[122],"approaches":[123],"their":[124],"performance":[125],"very":[128],"challenging":[129],"setting:":[130],"tested":[132],"set":[136],"sampled":[137],"from":[138],"data":[140],"used":[141],"estimate":[143],"supervised":[145],"systems.":[146],"By":[147],"comparing":[148],"Wikipedia-only":[150],"training":[151],"model,":[154],"demonstrate":[156],"that":[157],"modeling":[158],"is":[161],"beneficial.":[162],"Mr.":[163],"Trump":[164],"discussed":[165],"Brexit":[166],"Mrs.":[168],"May":[169],".":[170],"Donald_Trump":[171],"(*)":[172],"Donald_Trump_Jr.":[173],"Melania_Trump":[174],"Ivanka_Trump":[175],"Trump_(card_games)":[176],"Trump_(surname)":[177],"Trump_(video_gamer)":[178],"Trump_(magazine)":[179],"Trump,_Colorado":[180],"...":[181]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":10},{"year":2022,"cited_by_count":8},{"year":2021,"cited_by_count":12},{"year":2020,"cited_by_count":8},{"year":2019,"cited_by_count":3}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
