{"id":"https://openalex.org/W4402810096","doi":"https://doi.org/10.3233/ssw240006","title":"Zero-Shot Topic Classification of Column Headers: Leveraging LLMs for Metadata Enrichment","display_name":"Zero-Shot Topic Classification of Column Headers: Leveraging LLMs for Metadata Enrichment","publication_year":2024,"publication_date":"2024-09-11","ids":{"openalex":"https://openalex.org/W4402810096","doi":"https://doi.org/10.3233/ssw240006"},"language":"en","primary_location":{"id":"doi:10.3233/ssw240006","is_oa":true,"landing_page_url":"https://doi.org/10.3233/ssw240006","pdf_url":null,"source":{"id":"https://openalex.org/S4210172742","display_name":"Studies on the semantic web","issn_l":"2215-0870","issn":["2215-0870"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"ebook platform"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Studies on the Semantic Web","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.3233/ssw240006","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5078978666","display_name":"Margherita Martorana","orcid":"https://orcid.org/0000-0001-8004-0464"},"institutions":[{"id":"https://openalex.org/I865915315","display_name":"Vrije Universiteit Amsterdam","ror":"https://ror.org/008xxew50","country_code":"NL","type":"education","lineage":["https://openalex.org/I865915315"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Margherita Martorana","raw_affiliation_strings":["Department of Computer Science, Vrije Universiteit Amsterdam, De Boelelaan 1105, Amsterdam, The Netherlands"],"raw_orcid":"https://orcid.org/0000-0001-8004-0464","affiliations":[{"raw_affiliation_string":"Department of Computer Science, Vrije Universiteit Amsterdam, De Boelelaan 1105, Amsterdam, The Netherlands","institution_ids":["https://openalex.org/I865915315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016394820","display_name":"Tobias Kuhn","orcid":"https://orcid.org/0000-0002-1267-0234"},"institutions":[{"id":"https://openalex.org/I865915315","display_name":"Vrije Universiteit Amsterdam","ror":"https://ror.org/008xxew50","country_code":"NL","type":"education","lineage":["https://openalex.org/I865915315"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Tobias Kuhn","raw_affiliation_strings":["Department of Computer Science, Vrije Universiteit Amsterdam, De Boelelaan 1105, Amsterdam, The Netherlands"],"raw_orcid":"https://orcid.org/0000-0002-1267-0234","affiliations":[{"raw_affiliation_string":"Department of Computer Science, Vrije Universiteit Amsterdam, De Boelelaan 1105, Amsterdam, The Netherlands","institution_ids":["https://openalex.org/I865915315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001824063","display_name":"Lise Stork","orcid":"https://orcid.org/0000-0002-2146-4803"},"institutions":[{"id":"https://openalex.org/I865915315","display_name":"Vrije Universiteit Amsterdam","ror":"https://ror.org/008xxew50","country_code":"NL","type":"education","lineage":["https://openalex.org/I865915315"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Lise Stork","raw_affiliation_strings":["Department of Computer Science, Vrije Universiteit Amsterdam, De Boelelaan 1105, Amsterdam, The Netherlands"],"raw_orcid":"https://orcid.org/0000-0002-2146-4803","affiliations":[{"raw_affiliation_string":"Department of Computer Science, Vrije Universiteit Amsterdam, De Boelelaan 1105, Amsterdam, The Netherlands","institution_ids":["https://openalex.org/I865915315"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042220566","display_name":"Jacco van Ossenbruggen","orcid":"https://orcid.org/0000-0002-7748-4715"},"institutions":[{"id":"https://openalex.org/I865915315","display_name":"Vrije Universiteit Amsterdam","ror":"https://ror.org/008xxew50","country_code":"NL","type":"education","lineage":["https://openalex.org/I865915315"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Jacco van Ossenbruggen","raw_affiliation_strings":["Department of Computer Science, Vrije Universiteit Amsterdam, De Boelelaan 1105, Amsterdam, The Netherlands"],"raw_orcid":"https://orcid.org/0000-0002-7748-4715","affiliations":[{"raw_affiliation_string":"Department of Computer Science, Vrije Universiteit Amsterdam, De Boelelaan 1105, Amsterdam, The Netherlands","institution_ids":["https://openalex.org/I865915315"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I865915315"],"apc_list":null,"apc_paid":null,"fwci":2.0838,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.88375796,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9800999760627747,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9800999760627747,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9632999897003174,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9470000267028809,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.8365089297294617},{"id":"https://openalex.org/keywords/column","display_name":"Column (typography)","score":0.6377339363098145},{"id":"https://openalex.org/keywords/zero","display_name":"Zero (linguistics)","score":0.6055261492729187},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5659597516059875},{"id":"https://openalex.org/keywords/shot","display_name":"Shot (pellet)","score":0.5255985260009766},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.36812257766723633},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.34217411279678345},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.10229435563087463},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.08760941028594971},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.05516993999481201}],"concepts":[{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.8365089297294617},{"id":"https://openalex.org/C2780551164","wikidata":"https://www.wikidata.org/wiki/Q2306599","display_name":"Column (typography)","level":3,"score":0.6377339363098145},{"id":"https://openalex.org/C2780813799","wikidata":"https://www.wikidata.org/wiki/Q3274237","display_name":"Zero (linguistics)","level":2,"score":0.6055261492729187},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5659597516059875},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.5255985260009766},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.36812257766723633},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.34217411279678345},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.10229435563087463},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.08760941028594971},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.05516993999481201},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.3233/ssw240006","is_oa":true,"landing_page_url":"https://doi.org/10.3233/ssw240006","pdf_url":null,"source":{"id":"https://openalex.org/S4210172742","display_name":"Studies on the semantic web","issn_l":"2215-0870","issn":["2215-0870"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"ebook platform"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Studies on the Semantic Web","raw_type":"book-chapter"},{"id":"pmh:oai:research.vu.nl:openaire/5ae62c15-4ab7-4258-b304-eba457f37d98","is_oa":true,"landing_page_url":"https://research.vu.nl/en/publications/5ae62c15-4ab7-4258-b304-eba457f37d98","pdf_url":null,"source":{"id":"https://openalex.org/S4306401107","display_name":"VU Research Portal","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I865915315","host_organization_name":"Vrije Universiteit Amsterdam","host_organization_lineage":["https://openalex.org/I865915315"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Martorana, M, Kuhn, T, Stork, L & Ossenbruggen, J V 2024, Zero-Shot Topic Classification of Column Headers: Leveraging LLMs for Metadata Enrichment. in A A Salatino, M Alam, F Ongenae, S Vahdati, A L Gentile, T Pellegrini & S Jiang (eds), Knowledge Graphs in the Age of Language Models and Neuro-Symbolic AI : Proceedings of the 20th InternationalConference on Semantic Systems, 17-19 September 2024, Amsterdam, The Netherlands. Studies on the Semantic Web, vol. 60, IOS Press, pp. 52-66. https://doi.org/10.3233/SSW240006","raw_type":"info:eu-repo/semantics/conferenceObject"},{"id":"pmh:oai:research.vu.nl:publications/5ae62c15-4ab7-4258-b304-eba457f37d98","is_oa":true,"landing_page_url":"https://hdl.handle.net/1871.1/5ae62c15-4ab7-4258-b304-eba457f37d98","pdf_url":null,"source":{"id":"https://openalex.org/S4306401107","display_name":"VU Research Portal","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I865915315","host_organization_name":"Vrije Universiteit Amsterdam","host_organization_lineage":["https://openalex.org/I865915315"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Martorana, M, Kuhn, T, Stork, L & Ossenbruggen, J V 2024, Zero-Shot Topic Classification of Column Headers: Leveraging LLMs for Metadata Enrichment. in A A Salatino, M Alam, F Ongenae, S Vahdati, A L Gentile, T Pellegrini & S Jiang (eds), Knowledge Graphs in the Age of Language Models and Neuro-Symbolic AI : Proceedings of the 20th InternationalConference on Semantic Systems, 17-19 September 2024, Amsterdam, The Netherlands. Studies on the Semantic Web, vol. 60, IOS Press, pp. 52-66. https://doi.org/10.3233/SSW240006","raw_type":"info:eu-repo/semantics/conferenceObject"}],"best_oa_location":{"id":"doi:10.3233/ssw240006","is_oa":true,"landing_page_url":"https://doi.org/10.3233/ssw240006","pdf_url":null,"source":{"id":"https://openalex.org/S4210172742","display_name":"Studies on the semantic web","issn_l":"2215-0870","issn":["2215-0870"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"ebook platform"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Studies on the Semantic Web","raw_type":"book-chapter"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G8592584310","display_name":"ODISSEI: Better Infrastructure, Better Science, Better Society","funder_award_id":"184.035.014","funder_id":"https://openalex.org/F4320321800","funder_display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek"}],"funders":[{"id":"https://openalex.org/F4320321800","display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek","ror":"https://ror.org/04jsz6e67"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2392768766","https://openalex.org/W2058118494","https://openalex.org/W2074502265","https://openalex.org/W4214877189","https://openalex.org/W2382021449","https://openalex.org/W2095118173","https://openalex.org/W2773965352","https://openalex.org/W2104269053","https://openalex.org/W2106424170","https://openalex.org/W1985426483"],"abstract_inverted_index":{"Traditional":[0],"dataset":[1,152,235],"retrieval":[2,236],"systems":[3],"rely":[4],"on":[5,11,63,68,154,186,248],"metadata":[6,18,44,231],"for":[7,198],"indexing,":[8],"rather":[9],"than":[10],"the":[12,72,94,100,113,117,120,126,129,146,155,219,226,238,249],"underlying":[13],"data":[14,247],"values.":[15],"However,":[16],"high-quality":[17],"creation":[19],"and":[20,30,58,138,163,214,237,242],"enrichment":[21,45],"often":[22],"require":[23],"manual":[24],"annotations,":[25],"which":[26],"is":[27],"a":[28,40,81,90,107,192,205,209],"labour-intensive":[29],"challenging":[31],"process":[32],"to":[33,42,228],"automate.":[34],"In":[35],"this":[36],"study,":[37],"we":[38,144,177],"propose":[39],"method":[41],"support":[43],"using":[46,204],"topic":[47,96,121,199],"annotations":[48],"generated":[49],"by":[50],"three":[51],"Large":[52,108,215],"Language":[53],"Models":[54],"(LLMs):":[55],"ChatGPT-3.5,":[56],"GoogleBard,":[57],"GoogleGemini.":[59],"Our":[60,86,158],"analysis":[61],"focuses":[62],"classifying":[64],"column":[65,202],"headers":[66,203],"based":[67],"domain-specific":[69],"topics":[70],"from":[71],"Consortium":[73],"of":[74,115,119,128,133,148,169,201,212,245],"European":[75],"Social":[76],"Science":[77],"Data":[78,83],"Archives":[79],"(CESSDA),":[80],"Linked":[82],"controlled":[84,95,206],"vocabulary.":[85],"approach":[87,194,224],"operates":[88],"in":[89,131,167],"zero-shot":[91],"setting,":[92],"integrating":[93],"vocabulary":[97],"directly":[98],"within":[99,218],"input":[101],"prompt.":[102],"This":[103,189,223],"integration":[104],"serves":[105],"as":[106,172,174],"Context":[109,216],"Windows":[110,217],"approach,":[111],"with":[112,140],"aim":[114],"improving":[116],"results":[118],"classification":[122,156,200],"task.":[123],"We":[124],"evaluated":[125],"performance":[127],"LLMs":[130,197,213],"terms":[132,168],"internal":[134,170],"consistency,":[135],"inter-machine":[136],"alignment,":[137],"agreement":[139],"human":[141],"classification.":[142],"Additionally,":[143],"investigate":[145],"impact":[147,185],"contextual":[149,180],"information":[150,181],"(i.e.,":[151],"description)":[153],"outcomes.":[157],"findings":[159],"suggest":[160],"that":[161,179,195],"ChatGPT":[162],"GoogleGemini":[164],"outperform":[165],"GoogleBard":[166],"consistency":[171],"well":[173],"LLM-human-agreement.":[175],"Interestingly,":[176],"found":[178],"had":[182],"no":[183],"significant":[184],"LLM":[187],"performance.":[188],"work":[190],"proposes":[191],"novel":[193],"leverages":[196],"vocabulary,":[207],"presenting":[208],"practical":[210],"application":[211],"Semantic":[220],"Web":[221],"domain.":[222],"has":[225],"potential":[227],"facilitate":[229],"automated":[230],"enrichment,":[232],"thereby":[233],"enhancing":[234],"Findability,":[239],"Accessibility,":[240],"Interoperability,":[241],"Reusability":[243],"(FAIR)":[244],"research":[246],"Web.":[250]},"counts_by_year":[{"year":2025,"cited_by_count":3}],"updated_date":"2026-07-02T09:51:11.867554","created_date":"2025-10-10T00:00:00"}
