{"id":"https://openalex.org/W7139117499","doi":"https://doi.org/10.1016/j.datak.2026.102596","title":"How good are LLMs in disambiguating entities in tabular data? A comprehensive study","display_name":"How good are LLMs in disambiguating entities in tabular data? A comprehensive study","publication_year":2026,"publication_date":"2026-03-20","ids":{"openalex":"https://openalex.org/W7139117499","doi":"https://doi.org/10.1016/j.datak.2026.102596"},"language":"en","primary_location":{"id":"doi:10.1016/j.datak.2026.102596","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.datak.2026.102596","pdf_url":null,"source":{"id":"https://openalex.org/S136993123","display_name":"Data & Knowledge Engineering","issn_l":"0169-023X","issn":["0169-023X","1872-6933"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data &amp; Knowledge Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1016/j.datak.2026.102596","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Federico Belotti","orcid":"https://orcid.org/0009-0008-0140-3318"},"institutions":[{"id":"https://openalex.org/I66752286","display_name":"University of Milano-Bicocca","ror":"https://ror.org/01ynf4891","country_code":"IT","type":"education","lineage":["https://openalex.org/I66752286"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Federico Belotti","raw_affiliation_strings":["University of Milano-Bicocca, viale Sarca 336, Milan, 20126, Italy"],"raw_orcid":"https://orcid.org/0009-0008-0140-3318","affiliations":[{"raw_affiliation_string":"University of Milano-Bicocca, viale Sarca 336, Milan, 20126, Italy","institution_ids":["https://openalex.org/I66752286"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129890214","display_name":"Marco Cremaschi","orcid":null},"institutions":[{"id":"https://openalex.org/I66752286","display_name":"University of Milano-Bicocca","ror":"https://ror.org/01ynf4891","country_code":"IT","type":"education","lineage":["https://openalex.org/I66752286"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Marco Cremaschi","raw_affiliation_strings":["University of Milano-Bicocca, viale Sarca 336, Milan, 20126, Italy"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Milano-Bicocca, viale Sarca 336, Milan, 20126, Italy","institution_ids":["https://openalex.org/I66752286"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129806416","display_name":"Fabio Dadda","orcid":null},"institutions":[{"id":"https://openalex.org/I66752286","display_name":"University of Milano-Bicocca","ror":"https://ror.org/01ynf4891","country_code":"IT","type":"education","lineage":["https://openalex.org/I66752286"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Fabio Dadda","raw_affiliation_strings":["University of Milano-Bicocca, viale Sarca 336, Milan, 20126, Italy"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Milano-Bicocca, viale Sarca 336, Milan, 20126, Italy","institution_ids":["https://openalex.org/I66752286"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130040433","display_name":"Roberto Avogadro","orcid":null},"institutions":[{"id":"https://openalex.org/I173888879","display_name":"SINTEF","ror":"https://ror.org/01f677e56","country_code":"NO","type":"facility","lineage":["https://openalex.org/I173888879"]}],"countries":["NO"],"is_corresponding":false,"raw_author_name":"Roberto Avogadro","raw_affiliation_strings":["SINTEF, Forskningsveien 1, Oslo, 0373, Norway"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"SINTEF, Forskningsveien 1, Oslo, 0373, Norway","institution_ids":["https://openalex.org/I173888879"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065845420","display_name":"Matteo Palmonari","orcid":"https://orcid.org/0000-0002-1801-5118"},"institutions":[{"id":"https://openalex.org/I66752286","display_name":"University of Milano-Bicocca","ror":"https://ror.org/01ynf4891","country_code":"IT","type":"education","lineage":["https://openalex.org/I66752286"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Matteo Palmonari","raw_affiliation_strings":["University of Milano-Bicocca, viale Sarca 336, Milan, 20126, Italy"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Milano-Bicocca, viale Sarca 336, Milan, 20126, Italy","institution_ids":["https://openalex.org/I66752286"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I66752286"],"apc_list":{"value":2590,"currency":"USD","value_usd":2590},"apc_paid":{"value":2590,"currency":"USD","value_usd":2590},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.63913378,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"164","issue":null,"first_page":"102596","last_page":"102596"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T14330","display_name":"Library Science and Information Systems","score":0.43799999356269836,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T14330","display_name":"Library Science and Information Systems","score":0.43799999356269836,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.10440000146627426,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.0868000015616417,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.3070000112056732},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.27480000257492065},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.27320000529289246}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4668000042438507},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.3610999882221222},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.3070000112056732},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.28780001401901245},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.2809999883174896},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.27480000257492065},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.27320000529289246},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.2596000134944916},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.25450000166893005},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.23989999294281006}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1016/j.datak.2026.102596","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.datak.2026.102596","pdf_url":null,"source":{"id":"https://openalex.org/S136993123","display_name":"Data & Knowledge Engineering","issn_l":"0169-023X","issn":["0169-023X","1872-6933"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data &amp; Knowledge Engineering","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1016/j.datak.2026.102596","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.datak.2026.102596","pdf_url":null,"source":{"id":"https://openalex.org/S136993123","display_name":"Data & Knowledge Engineering","issn_l":"0169-023X","issn":["0169-023X","1872-6933"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data &amp; Knowledge Engineering","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320338019","display_name":"FP7 Coordination of Research Activities","ror":null},{"id":"https://openalex.org/F5497039910","display_name":"Ministero dell'Istruzione e del Merito","ror":"https://ror.org/01ehyh486"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W2080133951","https://openalex.org/W2111869785","https://openalex.org/W2898796029","https://openalex.org/W3014705052","https://openalex.org/W3030452136","https://openalex.org/W4281826654","https://openalex.org/W4282983436","https://openalex.org/W4309563570","https://openalex.org/W4366835631","https://openalex.org/W4389146960","https://openalex.org/W4389945730","https://openalex.org/W4398243240","https://openalex.org/W4400337099","https://openalex.org/W4402165975","https://openalex.org/W4412834578","https://openalex.org/W4414982091"],"related_works":[],"abstract_inverted_index":{"Tables":[0],"are":[1,144],"crucial":[2],"containers":[3],"of":[4,44,55,101,137,176,207,214],"information,":[5],"but":[6],"understanding":[7],"their":[8],"meaning":[9],"may":[10],"be":[11],"challenging.":[12],"Over":[13],"the":[14,39,42,135,154,174,181,187,196,211,220],"years,":[15],"there":[16],"has":[17,49],"been":[18,33,65],"a":[19,52,69,121,191],"surge":[20],"in":[21,23,153,164,205,219],"interest":[22],"data-driven":[24],"approaches":[25,56,62,85,106,178],"based":[26],"on":[27,68,91,190],"deep":[28],"learning":[29],"that":[30],"have":[31,63],"increasingly":[32],"combined":[34],"with":[35,86,120,210],"heuristic-based":[36,138],"ones.":[37],"In":[38,94],"last":[40],"period,":[41],"advent":[43],"Large":[45],"Language":[46],"Models":[47],"(LLMs)":[48],"led":[50],"to":[51,125,134,172,179],"new":[53,216],"category":[54],"for":[57],"table":[58],"annotation.":[59],"However,":[60],"these":[61,177],"not":[64],"consistently":[66],"evaluated":[67],"common":[70],"ground,":[71],"making":[72],"evaluation":[73,100,123,155,193],"and":[74,88,116,131,142,147,158,195,198],"comparison":[75],"difficult.":[76],"This":[77],"work":[78],"uniquely":[79],"compares":[80],"Semantic":[81],"Table":[82],"Interpretation":[83],"(STI)":[84],"generative":[87],"encoder-only":[89,146],"LLMs":[90],"diverse":[92],"datasets.":[93],"particular,":[95],"we":[96],"conduct":[97],"an":[98],"extensive":[99],"four":[102],"STI":[103],"state-of-the-art":[104],"(SOTA)":[105],"\u2014":[107],"Alligator":[108,130],"(formerly":[109],"s-elBat":[110],"),":[111],"TURL":[112,141],",":[113,115,160],"TableLlama":[114,143],"DAGOBAH":[117,132],"(the":[118],"latter":[119],"partial":[122],"due":[124],"its":[126],"high":[127],"computational":[128,197,208],"demands);":[129],"belong":[133],"family":[136],"algorithms,":[139],"while":[140],"respectively":[145],"decoder-only":[148],"LLMs.":[149],"We":[150],"also":[151],"include":[152],"both":[156,186],"GPT-4o":[157],"GPT-4o-mini":[159],"since":[161],"they":[162],"excel":[163],"various":[165],"public":[166],"benchmarks.":[167],"The":[168],"primary":[169],"objective":[170],"is":[171],"measure":[173],"ability":[175],"solve":[180],"entity":[182],"disambiguation":[183],"task":[184],"concerning":[185],"performance":[188],"achieved":[189],"common-ground":[192],"setting":[194],"cost":[199],"requirements":[200],"involved,":[201],"either":[202],"monetary":[203],"or":[204],"terms":[206],"resources,":[209],"ultimate":[212],"aim":[213],"charting":[215],"research":[217],"paths":[218],"field.":[221]},"counts_by_year":[],"updated_date":"2026-04-02T13:48:15.688549","created_date":"2026-03-20T00:00:00"}
