{"id":"https://openalex.org/W7155387890","doi":"https://doi.org/10.1109/access.2026.3686891","title":"LongSemAnnotator: A Longformer Framework for Column Type Annotation","display_name":"LongSemAnnotator: A Longformer Framework for Column Type Annotation","publication_year":2026,"publication_date":"2026-01-01","ids":{"openalex":"https://openalex.org/W7155387890","doi":"https://doi.org/10.1109/access.2026.3686891"},"language":"en","primary_location":{"id":"doi:10.1109/access.2026.3686891","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2026.3686891","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1109/access.2026.3686891","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Tiago Santos","orcid":"https://orcid.org/0009-0000-5616-4141"},"institutions":[{"id":"https://openalex.org/I83558840","display_name":"Universidade Nova de Lisboa","ror":"https://ror.org/02xankh89","country_code":"PT","type":"education","lineage":["https://openalex.org/I83558840"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Tiago Santos","raw_affiliation_strings":["NOVA Information Management School (NOVA IMS), Universidade NOVA de Lisboa, Campus de Campolide, Lisbon, Portugal"],"raw_orcid":"https://orcid.org/0009-0000-5616-4141","affiliations":[{"raw_affiliation_string":"NOVA Information Management School (NOVA IMS), Universidade NOVA de Lisboa, Campus de Campolide, Lisbon, Portugal","institution_ids":["https://openalex.org/I83558840"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114312941","display_name":"Yuriy Perezhohin","orcid":null},"institutions":[{"id":"https://openalex.org/I83558840","display_name":"Universidade Nova de Lisboa","ror":"https://ror.org/02xankh89","country_code":"PT","type":"education","lineage":["https://openalex.org/I83558840"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Yuriy Perezhohin","raw_affiliation_strings":["NOVA Information Management School (NOVA IMS), Universidade NOVA de Lisboa, Campus de Campolide, Lisbon, Portugal"],"raw_orcid":"https://orcid.org/0009-0004-1046-7883","affiliations":[{"raw_affiliation_string":"NOVA Information Management School (NOVA IMS), Universidade NOVA de Lisboa, Campus de Campolide, Lisbon, Portugal","institution_ids":["https://openalex.org/I83558840"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5094000405","display_name":"Victor Costa","orcid":null},"institutions":[{"id":"https://openalex.org/I83558840","display_name":"Universidade Nova de Lisboa","ror":"https://ror.org/02xankh89","country_code":"PT","type":"education","lineage":["https://openalex.org/I83558840"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Victor Costa","raw_affiliation_strings":["NOVA Information Management School (NOVA IMS), Universidade NOVA de Lisboa, Campus de Campolide, Lisbon, Portugal"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NOVA Information Management School (NOVA IMS), Universidade NOVA de Lisboa, Campus de Campolide, Lisbon, Portugal","institution_ids":["https://openalex.org/I83558840"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054467954","display_name":"Fernando Peres","orcid":null},"institutions":[{"id":"https://openalex.org/I83558840","display_name":"Universidade Nova de Lisboa","ror":"https://ror.org/02xankh89","country_code":"PT","type":"education","lineage":["https://openalex.org/I83558840"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Fernando Peres","raw_affiliation_strings":["NOVA Information Management School (NOVA IMS), Universidade NOVA de Lisboa, Campus de Campolide, Lisbon, Portugal"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NOVA Information Management School (NOVA IMS), Universidade NOVA de Lisboa, Campus de Campolide, Lisbon, Portugal","institution_ids":["https://openalex.org/I83558840"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5087976149","display_name":"Mauro Castelli","orcid":"https://orcid.org/0000-0002-8793-1451"},"institutions":[{"id":"https://openalex.org/I83558840","display_name":"Universidade Nova de Lisboa","ror":"https://ror.org/02xankh89","country_code":"PT","type":"education","lineage":["https://openalex.org/I83558840"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Mauro Castelli","raw_affiliation_strings":["NOVA Information Management School (NOVA IMS), Universidade NOVA de Lisboa, Campus de Campolide, Lisbon, Portugal"],"raw_orcid":"https://orcid.org/0000-0002-8793-1451","affiliations":[{"raw_affiliation_string":"NOVA Information Management School (NOVA IMS), Universidade NOVA de Lisboa, Campus de Campolide, Lisbon, Portugal","institution_ids":["https://openalex.org/I83558840"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.61524768,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"14","issue":null,"first_page":"63050","last_page":"63062"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.1965000033378601,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.1965000033378601,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.10450000315904617,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.0640999972820282,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/column","display_name":"Column (typography)","score":0.6879000067710876},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.45170000195503235},{"id":"https://openalex.org/keywords/type","display_name":"Type (biology)","score":0.3944999873638153},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3750999867916107},{"id":"https://openalex.org/keywords/algorithm-design","display_name":"Algorithm design","score":0.2793000042438507}],"concepts":[{"id":"https://openalex.org/C2780551164","wikidata":"https://www.wikidata.org/wiki/Q2306599","display_name":"Column (typography)","level":3,"score":0.6879000067710876},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6485999822616577},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.45170000195503235},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40939998626708984},{"id":"https://openalex.org/C2777299769","wikidata":"https://www.wikidata.org/wiki/Q3707858","display_name":"Type (biology)","level":2,"score":0.3944999873638153},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3750999867916107},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.33079999685287476},{"id":"https://openalex.org/C106516650","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm design","level":2,"score":0.2793000042438507},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2687999904155731},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2578999996185303}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2026.3686891","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2026.3686891","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:02ef2093d94b4e3b85bd8a918b5b1a90","is_oa":true,"landing_page_url":"https://doaj.org/article/02ef2093d94b4e3b85bd8a918b5b1a90","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 14, Pp 63050-63062 (2026)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2026.3686891","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2026.3686891","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G152798224","display_name":null,"funder_award_id":"101084013","funder_id":"https://openalex.org/F8712659396","funder_display_name":"European Health and Digital Executive Agency"},{"id":"https://openalex.org/G2444725599","display_name":"Information Management Research Center","funder_award_id":"UID/04152/2025","funder_id":"https://openalex.org/F4320334779","funder_display_name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia"},{"id":"https://openalex.org/G6120512878","display_name":"Information Management Research Center","funder_award_id":"UID/PRR/04152/2025","funder_id":"https://openalex.org/F4320334779","funder_display_name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia"}],"funders":[{"id":"https://openalex.org/F4320334779","display_name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia","ror":"https://ror.org/00snfqn58"},{"id":"https://openalex.org/F8712659396","display_name":"European Health and Digital Executive Agency","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Column":[0],"type":[1],"annotation":[2,117,209],"(CTA)":[3],"is":[4],"a":[5,68,102],"critical":[6],"task":[7],"in":[8,186],"semantic":[9,15,30,86,193,218],"table":[10,21],"interpretation":[11],"(STI),":[12],"where":[13],"meaningful":[14],"labels":[16],"are":[17],"automatically":[18],"assigned":[19],"to":[20,71,84,111,182],"columns.":[22],"This":[23],"process":[24],"enriches":[25],"tabular":[26],"data":[27,37],"with":[28,53,143,204,214],"essential":[29],"context,":[31],"making":[32],"it":[33],"fundamental":[34],"for":[35,88,225],"efficient":[36],"integration,":[38],"cataloging,":[39],"governance,":[40],"and":[41,56,75,140,162,172,220],"the":[42,73,91,116,121,136,144,151,174,183,187,200],"development":[43],"of":[44,77,93],"natural":[45],"language":[46],"database":[47],"interfaces.":[48],"Many":[49],"existing":[50],"methods":[51],"struggle":[52],"large":[54],"tables":[55],"integrating":[57],"context":[58,114,171,224],"from":[59],"multiple":[60],"tables.":[61,98],"To":[62],"address":[63],"these":[64],"challenges,":[65],"we":[66],"introduce":[67],"novel":[69],"approach":[70,125],"improve":[72],"accuracy":[74,180],"scalability":[76],"CTA.":[78],"Our":[79],"framework":[80],"employs":[81],"sentence":[82],"transformers":[83],"generate":[85],"embeddings":[87],"columns,":[89],"enabling":[90],"identification":[92],"similar":[94,196],"columns":[95,215],"across":[96],"different":[97],"We":[99,148],"then":[100],"utilize":[101],"Longformer-based":[103],"model,":[104],"which":[105],"can":[106],"handle":[107],"long":[108],"input":[109],"sequences,":[110],"incorporate":[112],"inter-table":[113,170],"into":[115],"process.":[118],"Evaluated":[119],"on":[120,135,154],"SOTAB":[122],"benchmark,":[123],"our":[124],"achieves":[126],"competitive":[127],"overall":[128],"performance,":[129],"reaching":[130],"82.86":[131],"micro-F1":[132,142],"when":[133],"trained":[134],"full":[137],"training":[138,146],"split":[139],"79.13":[141],"small":[145],"split.":[147],"also":[149],"investigate":[150],"model\u2019s":[152],"robustness":[153],"test":[155],"sets":[156],"containing":[157],"missing":[158],"values,":[159],"format":[160],"inconsistencies,":[161],"edge":[163],"cases.":[164],"Experimental":[165],"results":[166],"show":[167],"that":[168],"incorporating":[169],"using":[173],"Longformer":[175],"model":[176],"significantly":[177],"improves":[178],"CTA":[179],"compared":[181],"best":[184],"models":[185],"benchmark.":[188],"Detailed":[189],"error":[190],"analysis":[191],"identifies":[192],"overlap":[194,219],"between":[195],"label":[197],"types":[198],"as":[199],"primary":[201],"remaining":[202],"challenge,":[203],"retrieval":[205],"quality":[206],"directly":[207],"constraining":[208],"accuracy.":[210],"However,":[211],"challenges":[212],"remain":[213],"exhibiting":[216],"high":[217],"those":[221],"requiring":[222],"intra-table":[223],"disambiguation.":[226]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-24T00:00:00"}
