{"id":"https://openalex.org/W4386977134","doi":"https://doi.org/10.48550/arxiv.2309.11506","title":"Matching Table Metadata with Business Glossaries Using Large Language Models","display_name":"Matching Table Metadata with Business Glossaries Using Large Language Models","publication_year":2023,"publication_date":"2023-09-08","ids":{"openalex":"https://openalex.org/W4386977134","doi":"https://doi.org/10.48550/arxiv.2309.11506"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2309.11506","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2309.11506","pdf_url":"https://arxiv.org/pdf/2309.11506","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2309.11506","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5092925931","display_name":"Elita Lobo","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Lobo, Elita","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068065546","display_name":"Oktie Hassanzadeh","orcid":"https://orcid.org/0000-0001-5307-9857"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hassanzadeh, Oktie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011485086","display_name":"Nhan H. Pham","orcid":"https://orcid.org/0000-0002-4490-8649"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pham, Nhan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079757419","display_name":"Nandana Mihindukulasooriya","orcid":"https://orcid.org/0000-0003-1707-4842"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mihindukulasooriya, Nandana","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052501780","display_name":"Dharmashankar Subramanian","orcid":"https://orcid.org/0000-0002-1990-7740"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Subramanian, Dharmashankar","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5035277014","display_name":"Horst Samulowitz","orcid":"https://orcid.org/0000-0002-6780-3217"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Samulowitz, Horst","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5092925931"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9943000078201294,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.91063392162323},{"id":"https://openalex.org/keywords/glossary","display_name":"Glossary","score":0.8797442317008972},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.796203076839447},{"id":"https://openalex.org/keywords/column","display_name":"Column (typography)","score":0.6207624077796936},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.6069853901863098},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5957918167114258},{"id":"https://openalex.org/keywords/table","display_name":"Table (database)","score":0.5887565612792969},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.5398200750350952},{"id":"https://openalex.org/keywords/data-element","display_name":"Data element","score":0.46948012709617615},{"id":"https://openalex.org/keywords/metadata-repository","display_name":"Metadata repository","score":0.4620702862739563},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.29664450883865356},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.285883367061615},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.19455981254577637},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.15387418866157532}],"concepts":[{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.91063392162323},{"id":"https://openalex.org/C2780031656","wikidata":"https://www.wikidata.org/wiki/Q859161","display_name":"Glossary","level":2,"score":0.8797442317008972},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.796203076839447},{"id":"https://openalex.org/C2780551164","wikidata":"https://www.wikidata.org/wiki/Q2306599","display_name":"Column (typography)","level":3,"score":0.6207624077796936},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.6069853901863098},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5957918167114258},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.5887565612792969},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.5398200750350952},{"id":"https://openalex.org/C30872290","wikidata":"https://www.wikidata.org/wiki/Q1172389","display_name":"Data element","level":3,"score":0.46948012709617615},{"id":"https://openalex.org/C153048206","wikidata":"https://www.wikidata.org/wiki/Q3454922","display_name":"Metadata repository","level":3,"score":0.4620702862739563},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.29664450883865356},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.285883367061615},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.19455981254577637},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.15387418866157532},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2309.11506","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2309.11506","pdf_url":"https://arxiv.org/pdf/2309.11506","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2309.11506","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2309.11506","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2309.11506","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2309.11506","pdf_url":"https://arxiv.org/pdf/2309.11506","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"score":0.41999998688697815,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4386977134.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2379265733","https://openalex.org/W2183628870","https://openalex.org/W2374379029","https://openalex.org/W3084772717","https://openalex.org/W1503116306","https://openalex.org/W2170906434","https://openalex.org/W2189603309","https://openalex.org/W3199469221","https://openalex.org/W2545809202","https://openalex.org/W2360250168"],"abstract_inverted_index":{"Enterprises":[0],"often":[1],"own":[2],"large":[3,12,176],"collections":[4,21],"of":[5,11,43,72,91,159,175,248],"structured":[6],"data":[7,17,20,36,81,109],"in":[8,207],"the":[9,35,41,62,70,89,108,136,173,246],"form":[10],"databases":[13],"or":[14,94,103,121],"an":[15,92],"enterprise":[16],"lake.":[18],"Such":[19],"come":[22],"with":[23,221],"limited":[24],"metadata":[25,75],"and":[26,46,83,100,127,149,165,191,199,238],"strict":[27],"access":[28,33,106],"policies":[29],"that":[30,58,155,185,204,218],"could":[31],"limit":[32,40],"to":[34,76,107,113,117,134,143,180,227],"contents":[37],"and,":[38],"therefore,":[39],"application":[42],"classic":[44],"retrieval":[45,99],"analysis":[47,101],"solutions.":[48],"As":[49],"a":[50,54,77,157,233],"result,":[51],"there":[52,231],"is":[53,116,232],"need":[55,142],"for":[56,98,215],"solutions":[57],"can":[59,192,219],"effectively":[60],"utilize":[61,205],"available":[63,93],"metadata.":[64],"In":[65,168],"this":[66,114,169],"paper,":[67],"we":[68,171],"study":[69],"problem":[71,115],"matching":[73,87,183,222],"table":[74],"business":[78,96,153],"glossary":[79,97,128,239],"containing":[80],"labels":[82],"descriptions.":[84,167,240],"The":[85],"resulting":[86],"enables":[88],"use":[90,118],"curated":[95],"without":[102],"before":[104],"requesting":[105],"contents.":[110],"One":[111],"solution":[112],"manually-defined":[119],"rules":[120],"similarity":[122],"measures":[123],"on":[124],"column":[125,197,216,236],"names":[126,198,217,237],"descriptions":[129],"(or":[130],"their":[131],"vector":[132],"embeddings)":[133],"find":[135],"closest":[137],"match.":[138],"However,":[139],"such":[140],"approaches":[141],"be":[144],"tuned":[145],"through":[146],"manual":[147,189],"labeling":[148],"cannot":[150],"handle":[151],"many":[152],"glossaries":[154],"contain":[156],"combination":[158],"simple":[160],"as":[161,163],"well":[162],"complex":[164,194],"long":[166],"work,":[170],"leverage":[172],"power":[174],"language":[177],"models":[178],"(LLMs)":[179],"design":[181],"generic":[182],"methods":[184,203],"do":[186],"not":[187],"require":[188],"tuning":[190],"identify":[193],"relations":[195],"between":[196,235],"glossaries.":[200],"We":[201],"propose":[202],"LLMs":[206,226],"two":[208],"ways:":[209],"a)":[210],"by":[211,224],"generating":[212],"additional":[213],"context":[214],"aid":[220],"b)":[223],"using":[225],"directly":[228],"infer":[229],"if":[230],"relation":[234],"Our":[241],"preliminary":[242],"experimental":[243],"results":[244],"show":[245],"effectiveness":[247],"our":[249],"proposed":[250],"methods.":[251]},"counts_by_year":[],"updated_date":"2026-03-11T14:59:36.786465","created_date":"2023-09-23T00:00:00"}
