{"id":"https://openalex.org/W3145728363","doi":"https://doi.org/10.1145/3514221.3517906","title":"Annotating Columns with Pre-trained Language Models","display_name":"Annotating Columns with Pre-trained Language Models","publication_year":2022,"publication_date":"2022-06-10","ids":{"openalex":"https://openalex.org/W3145728363","doi":"https://doi.org/10.1145/3514221.3517906","mag":"3145728363"},"language":"en","primary_location":{"id":"doi:10.1145/3514221.3517906","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3514221.3517906","pdf_url":null,"source":{"id":"https://openalex.org/S4363608845","display_name":"Proceedings of the 2022 International Conference on Management of Data","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2022 International Conference on Management of Data","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2104.01785","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072690374","display_name":"Yoshihiko Suhara","orcid":"https://orcid.org/0000-0001-7554-2865"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yoshihiko Suhara","raw_affiliation_strings":["Megagon Labs, Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Megagon Labs, Mountain View, CA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100428948","display_name":"Jinfeng Li","orcid":"https://orcid.org/0000-0001-9462-2625"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jinfeng Li","raw_affiliation_strings":["Megagon Labs, Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Megagon Labs, Mountain View, CA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100750716","display_name":"Yuliang Li","orcid":"https://orcid.org/0000-0002-0602-149X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuliang Li","raw_affiliation_strings":["Megagon Labs, Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Megagon Labs, Mountain View, CA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100456041","display_name":"Dan Zhang","orcid":"https://orcid.org/0000-0002-7295-4837"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dan Zhang","raw_affiliation_strings":["Megagon Labs, Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Megagon Labs, Mountain View, CA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027360874","display_name":"\u00c7a\u011fatay Demiralp","orcid":"https://orcid.org/0009-0003-2080-0443"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"\u00c7a\u011fatay Demiralp","raw_affiliation_strings":["Sigma Computing, San Fransisco, CA, USA"],"affiliations":[{"raw_affiliation_string":"Sigma Computing, San Fransisco, CA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100418485","display_name":"Chen Chen","orcid":"https://orcid.org/0000-0002-7099-7905"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen Chen","raw_affiliation_strings":["Megagon Labs, Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Megagon Labs, Mountain View, CA, USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5026007789","display_name":"Wang-Chiew Tan","orcid":"https://orcid.org/0009-0003-4659-2291"},"institutions":[{"id":"https://openalex.org/I3197470489","display_name":"Alpha Omega Alpha Medical Honor Society","ror":"https://ror.org/057q9nn35","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I3197470489"]},{"id":"https://openalex.org/I4210099336","display_name":"Menlo School","ror":"https://ror.org/01240pn49","country_code":"US","type":"education","lineage":["https://openalex.org/I4210099336"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wang-Chiew Tan","raw_affiliation_strings":["Meta AI, Menlo Park, CA, USA"],"affiliations":[{"raw_affiliation_string":"Meta AI, Menlo Park, CA, USA","institution_ids":["https://openalex.org/I3197470489","https://openalex.org/I4210099336"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5072690374"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":16.5923,"has_fulltext":false,"cited_by_count":67,"citation_normalized_percentile":{"value":0.9952381,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1493","last_page":"1503"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/column","display_name":"Column (typography)","score":0.8681614398956299},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.814535915851593},{"id":"https://openalex.org/keywords/table","display_name":"Table (database)","score":0.7434487342834473},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6012346148490906},{"id":"https://openalex.org/keywords/relation","display_name":"Relation (database)","score":0.5001485347747803},{"id":"https://openalex.org/keywords/toolbox","display_name":"Toolbox","score":0.4546816647052765},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3949691653251648},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3833080530166626},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3461254835128784},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.34456750750541687},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.3037908971309662}],"concepts":[{"id":"https://openalex.org/C2780551164","wikidata":"https://www.wikidata.org/wiki/Q2306599","display_name":"Column (typography)","level":3,"score":0.8681614398956299},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.814535915851593},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.7434487342834473},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6012346148490906},{"id":"https://openalex.org/C25343380","wikidata":"https://www.wikidata.org/wiki/Q277521","display_name":"Relation (database)","level":2,"score":0.5001485347747803},{"id":"https://openalex.org/C2777655017","wikidata":"https://www.wikidata.org/wiki/Q1501161","display_name":"Toolbox","level":2,"score":0.4546816647052765},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3949691653251648},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3833080530166626},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3461254835128784},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.34456750750541687},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.3037908971309662},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3514221.3517906","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3514221.3517906","pdf_url":null,"source":{"id":"https://openalex.org/S4363608845","display_name":"Proceedings of the 2022 International Conference on Management of Data","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2022 International Conference on Management of Data","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2104.01785","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2104.01785","pdf_url":"https://arxiv.org/pdf/2104.01785","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2104.01785","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2104.01785","pdf_url":"https://arxiv.org/pdf/2104.01785","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"score":0.8100000023841858,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":83,"referenced_works":["https://openalex.org/W1483236033","https://openalex.org/W1614298861","https://openalex.org/W1614862348","https://openalex.org/W2008896880","https://openalex.org/W2009591769","https://openalex.org/W2070491211","https://openalex.org/W2092364718","https://openalex.org/W2094728533","https://openalex.org/W2111869785","https://openalex.org/W2123878016","https://openalex.org/W2157060173","https://openalex.org/W2168859760","https://openalex.org/W2250539671","https://openalex.org/W2493916176","https://openalex.org/W2556468274","https://openalex.org/W2624871570","https://openalex.org/W2753709519","https://openalex.org/W2788142400","https://openalex.org/W2889003264","https://openalex.org/W2889249015","https://openalex.org/W2896457183","https://openalex.org/W2898796029","https://openalex.org/W2904530076","https://openalex.org/W2913340405","https://openalex.org/W2914746235","https://openalex.org/W2923014074","https://openalex.org/W2941366772","https://openalex.org/W2946417913","https://openalex.org/W2948145720","https://openalex.org/W2950577311","https://openalex.org/W2951621897","https://openalex.org/W2963310665","https://openalex.org/W2963341956","https://openalex.org/W2963403868","https://openalex.org/W2963722008","https://openalex.org/W2964185501","https://openalex.org/W2965875055","https://openalex.org/W2969723769","https://openalex.org/W2970476646","https://openalex.org/W2970971581","https://openalex.org/W2971681342","https://openalex.org/W2972324944","https://openalex.org/W2979826702","https://openalex.org/W2986266667","https://openalex.org/W2997200074","https://openalex.org/W3002709689","https://openalex.org/W3010144884","https://openalex.org/W3013008430","https://openalex.org/W3014705052","https://openalex.org/W3015468748","https://openalex.org/W3025624935","https://openalex.org/W3035140194","https://openalex.org/W3035231859","https://openalex.org/W3035428952","https://openalex.org/W3037082750","https://openalex.org/W3044438666","https://openalex.org/W3082197983","https://openalex.org/W3082424964","https://openalex.org/W3085139254","https://openalex.org/W3093907107","https://openalex.org/W3094024803","https://openalex.org/W3095464614","https://openalex.org/W3098824823","https://openalex.org/W3102659883","https://openalex.org/W3103177583","https://openalex.org/W3112302456","https://openalex.org/W3113112245","https://openalex.org/W3118485687","https://openalex.org/W3119752913","https://openalex.org/W3123375411","https://openalex.org/W3129639992","https://openalex.org/W3145728363","https://openalex.org/W3155299751","https://openalex.org/W3165753548","https://openalex.org/W3165814564","https://openalex.org/W3168052339","https://openalex.org/W3174181645","https://openalex.org/W3208057978","https://openalex.org/W3209042722","https://openalex.org/W4205922070","https://openalex.org/W4288269198","https://openalex.org/W4295312788","https://openalex.org/W4385245566"],"related_works":["https://openalex.org/W4205140848","https://openalex.org/W2068663075","https://openalex.org/W2978678743","https://openalex.org/W2797837731","https://openalex.org/W4393677513","https://openalex.org/W4390832911","https://openalex.org/W829257147","https://openalex.org/W4385302116","https://openalex.org/W2150344375","https://openalex.org/W3123448197"],"abstract_inverted_index":{"Inferring":[0],"meta":[1],"information":[2,54],"about":[3],"tables,":[4],"such":[5],"as":[6,21,77],"column":[7,45,81,101,105],"headers":[8],"or":[9],"relationships":[10,49],"between":[11,50],"columns,":[12],"is":[13],"an":[14],"active":[15],"research":[16],"topic":[17],"in":[18],"data":[19,153],"management":[20],"we":[22,35],"find":[23],"many":[24],"tables":[25],"are":[26],"missing":[27],"some":[28],"of":[29,39,132,148],"this":[30,33],"information.":[31],"In":[32],"paper,":[34],"study":[36],"the":[37,48,56,74,100,124,146],"problem":[38,155],"annotating":[40],"table":[41,57,76],"columns":[42],"(i.e.,":[43],"predicting":[44],"types":[46],"and":[47,79,104,113,144],"columns)":[51],"using":[52,83],"only":[53,134],"from":[55],"itself.":[58],"We":[59,117,139],"develop":[60],"a":[61,84,129,141,151,157],"multi-task":[62],"learning":[63],"framework":[64],"(called":[65],"Doduo)":[66],"based":[67],"on":[68,96,150],"pre-trained":[69],"language":[70],"models,":[71],"which":[72],"takes":[73],"entire":[75],"input":[78],"predicts":[80],"types/relations":[82],"single":[85],"model.":[86],"Experimental":[87],"results":[88],"show":[89],"that":[90,119],"Doduo":[91,120,149],"establishes":[92],"new":[93],"state-of-the-art":[94,126],"performance":[95,127],"two":[97],"benchmarks":[98],"for":[99],"type":[102],"prediction":[103,107],"relation":[106],"tasks":[108],"with":[109,128],"up":[110],"to":[111],"4.0%":[112],"11.9%":[114],"improvements,":[115],"respectively.":[116],"report":[118],"can":[121],"already":[122],"outperform":[123],"previous":[125],"minimal":[130],"number":[131],"tokens,":[133],"8":[135],"tokens":[136],"per":[137],"column.":[138],"release":[140],"toolbox":[142],"(https://github.com/megagonlabs/doduo)":[143],"confirm":[145],"effectiveness":[147],"real-world":[152],"science":[154],"through":[156],"case":[158],"study.":[159]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":23},{"year":2024,"cited_by_count":19},{"year":2023,"cited_by_count":17},{"year":2022,"cited_by_count":5}],"updated_date":"2026-03-13T16:22:10.518609","created_date":"2025-10-10T00:00:00"}
