{"id":"https://openalex.org/W3166853427","doi":"https://doi.org/10.1145/3588710","title":"GitTables: A Large-Scale Corpus of Relational Tables","display_name":"GitTables: A Large-Scale Corpus of Relational Tables","publication_year":2023,"publication_date":"2023-05-26","ids":{"openalex":"https://openalex.org/W3166853427","doi":"https://doi.org/10.1145/3588710","mag":"3166853427"},"language":"en","primary_location":{"id":"doi:10.1145/3588710","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3588710","pdf_url":null,"source":{"id":"https://openalex.org/S4387289859","display_name":"Proceedings of the ACM on Management of Data","issn_l":"2836-6573","issn":["2836-6573"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Management of Data","raw_type":"journal-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1145/3588710","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058441702","display_name":"Madelon Hulsebos","orcid":"https://orcid.org/0000-0002-0949-7290"},"institutions":[{"id":"https://openalex.org/I4210135670","display_name":"Amsterdam University of the Arts","ror":"https://ror.org/04dde1554","country_code":"NL","type":"education","lineage":["https://openalex.org/I4210135670"]},{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Madelon Hulsebos","raw_affiliation_strings":["University of Amsterdam, Amsterdam, Netherlands"],"affiliations":[{"raw_affiliation_string":"University of Amsterdam, Amsterdam, Netherlands","institution_ids":["https://openalex.org/I4210135670","https://openalex.org/I887064364"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027360874","display_name":"\u00c7a\u011fatay Demiralp","orcid":"https://orcid.org/0009-0003-2080-0443"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"\u00c7agatay Demiralp","raw_affiliation_strings":["Sigma Computing, San Francisco, CA, USA"],"affiliations":[{"raw_affiliation_string":"Sigma Computing, San Francisco, CA, USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5034924491","display_name":"Paul Groth","orcid":"https://orcid.org/0000-0003-0183-6910"},"institutions":[{"id":"https://openalex.org/I4210135670","display_name":"Amsterdam University of the Arts","ror":"https://ror.org/04dde1554","country_code":"NL","type":"education","lineage":["https://openalex.org/I4210135670"]},{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Paul Groth","raw_affiliation_strings":["University of Amsterdam, Amsterdam, Netherlands"],"affiliations":[{"raw_affiliation_string":"University of Amsterdam, Amsterdam, Netherlands","institution_ids":["https://openalex.org/I4210135670","https://openalex.org/I887064364"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5058441702"],"corresponding_institution_ids":["https://openalex.org/I4210135670","https://openalex.org/I887064364"],"apc_list":null,"apc_paid":null,"fwci":0.7646,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.72416435,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"1","issue":"1","first_page":"1","last_page":"17"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8624982833862305},{"id":"https://openalex.org/keywords/table","display_name":"Table (database)","score":0.7259143590927124},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6212465167045593},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6182730197906494},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.6030173897743225},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5806161165237427},{"id":"https://openalex.org/keywords/column","display_name":"Column (typography)","score":0.5031589865684509},{"id":"https://openalex.org/keywords/relational-database","display_name":"Relational database","score":0.46063196659088135},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.44830313324928284},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.44743812084198},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.23047733306884766},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.09695523977279663}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8624982833862305},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.7259143590927124},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6212465167045593},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6182730197906494},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.6030173897743225},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5806161165237427},{"id":"https://openalex.org/C2780551164","wikidata":"https://www.wikidata.org/wiki/Q2306599","display_name":"Column (typography)","level":3,"score":0.5031589865684509},{"id":"https://openalex.org/C5655090","wikidata":"https://www.wikidata.org/wiki/Q192588","display_name":"Relational database","level":2,"score":0.46063196659088135},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44830313324928284},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.44743812084198},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.23047733306884766},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.09695523977279663},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":6,"locations":[{"id":"doi:10.1145/3588710","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3588710","pdf_url":null,"source":{"id":"https://openalex.org/S4387289859","display_name":"Proceedings of the ACM on Management of Data","issn_l":"2836-6573","issn":["2836-6573"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Management of Data","raw_type":"journal-article"},{"id":"pmh:oai:dare.uva.nl:openaire/9291f5fa-f4f7-4e39-b4fe-e8f16009598a","is_oa":true,"landing_page_url":"https://handle.uba.uva.nl/personal/pure/en/publications/gittables-a-largescale-corpus-of-relational-tables(9291f5fa-f4f7-4e39-b4fe-e8f16009598a).html","pdf_url":null,"source":{"id":"https://openalex.org/S4306400088","display_name":"UvA-DARE (University of Amsterdam)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I887064364","host_organization_name":"University of Amsterdam","host_organization_lineage":["https://openalex.org/I887064364"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Hulsebos, M, Demiralp, \u00c7 & Groth, P 2023, 'GitTables: A Large-Scale Corpus of Relational Tables', Proceedings of the ACM on Management of Data, vol. 1, no. 1, 30. https://doi.org/10.1145/3588710","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:arXiv.org:2106.07258","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2106.07258","pdf_url":"https://arxiv.org/pdf/2106.07258","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:dare.uva.nl:publications/9291f5fa-f4f7-4e39-b4fe-e8f16009598a","is_oa":true,"landing_page_url":"https://hdl.handle.net/11245.1/9291f5fa-f4f7-4e39-b4fe-e8f16009598a","pdf_url":"https://pure.uva.nl/ws/files/182457735/GitTables.pdf","source":{"id":"https://openalex.org/S4306400088","display_name":"UvA-DARE (University of Amsterdam)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I887064364","host_organization_name":"University of Amsterdam","host_organization_lineage":["https://openalex.org/I887064364"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Hulsebos, M, Demiralp, \u00c7 & Groth, P 2023, 'GitTables: A Large-Scale Corpus of Relational Tables', Proceedings of the ACM on Management of Data, vol. 1, no. 1, 30. https://doi.org/10.1145/3588710","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"mag:3166853427","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/2106.07258.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2106.07258","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2106.07258","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"doi:10.1145/3588710","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3588710","pdf_url":null,"source":{"id":"https://openalex.org/S4387289859","display_name":"Proceedings of the ACM on Management of Data","issn_l":"2836-6573","issn":["2836-6573"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Management of Data","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W102708294","https://openalex.org/W2108223890","https://openalex.org/W2108598243","https://openalex.org/W2119828234","https://openalex.org/W2162020046","https://openalex.org/W2181616869","https://openalex.org/W2196674927","https://openalex.org/W2262562434","https://openalex.org/W2290320465","https://openalex.org/W2341748398","https://openalex.org/W2493916176","https://openalex.org/W2522705507","https://openalex.org/W2525739395","https://openalex.org/W2528816126","https://openalex.org/W2537262135","https://openalex.org/W2604190938","https://openalex.org/W2626982577","https://openalex.org/W2889133671","https://openalex.org/W2891177506","https://openalex.org/W2902696364","https://openalex.org/W2919115771","https://openalex.org/W2941366772","https://openalex.org/W2951621897","https://openalex.org/W2963341956","https://openalex.org/W2971274354","https://openalex.org/W2996657743","https://openalex.org/W3004210300","https://openalex.org/W3008881932","https://openalex.org/W3025624935","https://openalex.org/W3030163527","https://openalex.org/W3037082750","https://openalex.org/W3081758821","https://openalex.org/W3082274269","https://openalex.org/W3082424964","https://openalex.org/W3095701222","https://openalex.org/W3099161928","https://openalex.org/W3113580345","https://openalex.org/W3113624456","https://openalex.org/W3119746452","https://openalex.org/W3133702157","https://openalex.org/W3147162460","https://openalex.org/W3196747654","https://openalex.org/W4205922070"],"related_works":["https://openalex.org/W3163913310","https://openalex.org/W3093341354","https://openalex.org/W3177052505","https://openalex.org/W3174481949","https://openalex.org/W3061846580","https://openalex.org/W2407617264","https://openalex.org/W2175997803","https://openalex.org/W3034044328","https://openalex.org/W3153273959","https://openalex.org/W23177027","https://openalex.org/W2613164689","https://openalex.org/W3030600301","https://openalex.org/W2112276485","https://openalex.org/W2282115141","https://openalex.org/W2217262223","https://openalex.org/W3016988636","https://openalex.org/W2795424778","https://openalex.org/W3022961397","https://openalex.org/W1565088482","https://openalex.org/W2326522764"],"abstract_inverted_index":{"The":[0,126],"success":[1],"of":[2,72,93,128,151],"deep":[3],"learning":[4],"has":[5],"sparked":[6],"interest":[7],"in":[8,113],"improving":[9],"relational":[10,63,74],"table":[11,19,25,28,107,111],"tasks,":[12],"like":[13],"data":[14,170],"preparation":[15],"and":[16,47,100,120,124,165,172,178],"search,":[17,171],"with":[18,59,115,144],"representation":[20],"models":[21,50],"trained":[22],"on":[23,132,142],"large":[24],"corpora.":[26,108],"Existing":[27],"corpora":[29],"primarily":[30],"contain":[31],"tables":[32,60,75],"extracted":[33,76],"from":[34,77,105,122],"HTML":[35],"pages,":[36],"limiting":[37],"the":[38,54,85,133,176],"capability":[39],"to":[40,87],"represent":[41],"offline":[42],"database":[43,64],"tables.":[44,65,91],"To":[45],"train":[46],"evaluate":[48],"high-capacity":[49],"for":[51,156,167],"applications":[52,150],"beyond":[53],"Web,":[55],"we":[56,67],"need":[57],"resources":[58],"that":[61,96,137],"resemble":[62],"Here":[66],"introduce":[68],"GitTables,":[69,152],"a":[70],"corpus":[71,86,177],"1M":[73],"GitHub.":[78],"Our":[79],"continuing":[80],"curation":[81],"aims":[82],"at":[83,88,181],"growing":[84],"least":[89],"10M":[90],"Analyses":[92],"GitTables":[94,114],"show":[95],"its":[97,154],"structure,":[98],"content,":[99],"topical":[101],"coverage":[102],"differ":[103],"significantly":[104],"existing":[106],"We":[109,147,174],"annotate":[110],"columns":[112],"semantic":[116,158],"types,":[117],"hierarchical":[118],"relations":[119],"descriptions":[121],"Schema.org":[123],"DBpedia.":[125],"evaluation":[127],"our":[129,138],"annotation":[130],"pipeline":[131],"T2Dv2":[134],"benchmark":[135],"illustrates":[136],"approach":[139],"provides":[140],"results":[141],"par":[143],"human":[145],"annotations.":[146],"present":[148],"three":[149],"demonstrating":[153],"value":[155],"learned":[157],"type":[159],"detection":[160],"models,":[161],"schema":[162],"completion":[163],"methods,":[164],"benchmarks":[166],"table-to-KG":[168],"matching,":[169],"preparation.":[173],"make":[175],"code":[179],"available":[180],"https://gittables.github.io.":[182]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
