{"id":"https://openalex.org/W4401352021","doi":"https://doi.org/10.14778/3665844.3665857","title":"ArcheType: A Novel Framework for Open-Source Column Type Annotation Using Large Language Models","display_name":"ArcheType: A Novel Framework for Open-Source Column Type Annotation Using Large Language Models","publication_year":2024,"publication_date":"2024-05-01","ids":{"openalex":"https://openalex.org/W4401352021","doi":"https://doi.org/10.14778/3665844.3665857"},"language":"en","primary_location":{"id":"doi:10.14778/3665844.3665857","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3665844.3665857","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016656010","display_name":"Benjamin Feuer","orcid":null},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Benjamin Feuer","raw_affiliation_strings":["New York University"],"affiliations":[{"raw_affiliation_string":"New York University","institution_ids":["https://openalex.org/I57206974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101564223","display_name":"Yurong Liu","orcid":"https://orcid.org/0000-0002-7974-9449"},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yurong Liu","raw_affiliation_strings":["New York University"],"affiliations":[{"raw_affiliation_string":"New York University","institution_ids":["https://openalex.org/I57206974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066142047","display_name":"Chinmay Hegde","orcid":"https://orcid.org/0000-0003-4574-8066"},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chinmay Hegde","raw_affiliation_strings":["New York University"],"affiliations":[{"raw_affiliation_string":"New York University","institution_ids":["https://openalex.org/I57206974"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5006773757","display_name":"Juliana Freire","orcid":"https://orcid.org/0000-0003-3915-7075"},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Juliana Freire","raw_affiliation_strings":["New York University"],"affiliations":[{"raw_affiliation_string":"New York University","institution_ids":["https://openalex.org/I57206974"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5016656010"],"corresponding_institution_ids":["https://openalex.org/I57206974"],"apc_list":null,"apc_paid":null,"fwci":6.8705,"has_fulltext":false,"cited_by_count":19,"citation_normalized_percentile":{"value":0.97332071,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":"17","issue":"9","first_page":"2279","last_page":"2292"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/archetype","display_name":"Archetype","score":0.7505648732185364},{"id":"https://openalex.org/keywords/column","display_name":"Column (typography)","score":0.741075873374939},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.677078366279602},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6076456308364868},{"id":"https://openalex.org/keywords/type","display_name":"Type (biology)","score":0.5611189603805542},{"id":"https://openalex.org/keywords/open-source","display_name":"Open source","score":0.46540600061416626},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4180162847042084},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.3764321804046631},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.34525465965270996},{"id":"https://openalex.org/keywords/geology","display_name":"Geology","score":0.12298154830932617},{"id":"https://openalex.org/keywords/art","display_name":"Art","score":0.08897894620895386},{"id":"https://openalex.org/keywords/literature","display_name":"Literature","score":0.06185060739517212}],"concepts":[{"id":"https://openalex.org/C49848784","wikidata":"https://www.wikidata.org/wiki/Q131714","display_name":"Archetype","level":2,"score":0.7505648732185364},{"id":"https://openalex.org/C2780551164","wikidata":"https://www.wikidata.org/wiki/Q2306599","display_name":"Column (typography)","level":3,"score":0.741075873374939},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.677078366279602},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6076456308364868},{"id":"https://openalex.org/C2777299769","wikidata":"https://www.wikidata.org/wiki/Q3707858","display_name":"Type (biology)","level":2,"score":0.5611189603805542},{"id":"https://openalex.org/C3018397939","wikidata":"https://www.wikidata.org/wiki/Q3644502","display_name":"Open source","level":3,"score":0.46540600061416626},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4180162847042084},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.3764321804046631},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34525465965270996},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.12298154830932617},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.08897894620895386},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.06185060739517212},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.14778/3665844.3665857","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3665844.3665857","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7599999904632568,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W1593532658","https://openalex.org/W1852306145","https://openalex.org/W2106895292","https://openalex.org/W2914354916","https://openalex.org/W2965875055","https://openalex.org/W2979826702","https://openalex.org/W3013008430","https://openalex.org/W3037082750","https://openalex.org/W3082424964","https://openalex.org/W3166853427","https://openalex.org/W3189373506","https://openalex.org/W4226278401","https://openalex.org/W4297950604","https://openalex.org/W4312205996","https://openalex.org/W4321448364","https://openalex.org/W4365456672","https://openalex.org/W4378189609","https://openalex.org/W4380433117","https://openalex.org/W4380433214","https://openalex.org/W4392681182","https://openalex.org/W6676014748","https://openalex.org/W6775208358","https://openalex.org/W6855204527"],"related_works":["https://openalex.org/W3042248303","https://openalex.org/W2971731373","https://openalex.org/W2946570158","https://openalex.org/W2367903128","https://openalex.org/W2372449700","https://openalex.org/W2912650197","https://openalex.org/W2529014963","https://openalex.org/W2366000998","https://openalex.org/W3136706476","https://openalex.org/W4322709950"],"abstract_inverted_index":{"Existing":[0],"deep-learning":[1],"approaches":[2],"to":[3,99,120],"semantic":[4,15],"column":[5],"type":[6],"annotation":[7],"(CTA)":[8],"have":[9,55],"important":[10],"shortcomings:":[11],"they":[12],"rely":[13],"on":[14,44,61,137,168],"types":[16,49],"which":[17,94,146],"are":[18],"fixed":[19],"at":[20],"training":[21,28],"time;":[22],"require":[23],"a":[24,62,80,104,133,164],"large":[25,96],"number":[26],"of":[27,65,112],"samples":[29],"per":[30],"type;":[31],"incur":[32],"high":[33],"run-time":[34],"inference":[35],"costs;":[36],"and":[37,67,91,116,123,153],"their":[38,73],"performance":[39,60,136],"can":[40],"degrade":[41],"when":[42,48,154],"evaluated":[43],"novel":[45],"datasets,":[46],"even":[47],"remain":[50],"constant.":[51],"Large":[52],"language":[53,97],"models":[54,98],"exhibited":[56],"strong":[57],"zero-shot":[58,106,138],"classification":[59],"wide":[63],"range":[64],"tasks":[66],"in":[68,103,156],"this":[69,151],"paper":[70],"we":[71,147],"explore":[72],"use":[74],"for":[75,84],"CTA.":[76],"We":[77,108],"introduce":[78],"ArcheType,":[79],"simple,":[81],"practical":[82],"method":[83,114],"context":[85,121],"sampling,":[86],"prompt":[87],"serialization,":[88],"model":[89,167],"querying,":[90],"label":[92,124],"remapping,":[93],"enables":[95],"solve":[100],"CTA":[101,139,160],"problems":[102],"fully":[105],"manner.":[107],"ablate":[109],"each":[110],"component":[111],"our":[113],"separately,":[115],"establish":[117],"that":[118],"improvements":[119],"sampling":[122],"remapping":[125],"provide":[126],"the":[127,169],"most":[128],"consistent":[129],"gains.":[130],"ArcheType":[131],"establishes":[132],"new":[134,143],"state-of-the-art":[135],"benchmarks":[140,145],"(including":[141],"three":[142],"domain-specific":[144],"release":[148],"along":[149],"with":[150,158],"paper),":[152],"used":[155],"conjunction":[157],"classical":[159],"techniques,":[161],"it":[162],"outperforms":[163],"SOTA":[165],"DoDuo":[166],"fine-tuned":[170],"SOTAB":[171],"benchmark.":[172]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":16},{"year":2024,"cited_by_count":2}],"updated_date":"2026-03-12T08:34:05.389933","created_date":"2025-10-10T00:00:00"}
