{"id":"https://openalex.org/W4386269375","doi":"https://doi.org/10.48550/arxiv.2308.13560","title":"Open Government Data Corpus for Table Search","display_name":"Open Government Data Corpus for Table Search","publication_year":2023,"publication_date":"2023-08-24","ids":{"openalex":"https://openalex.org/W4386269375","doi":"https://doi.org/10.48550/arxiv.2308.13560"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2308.13560","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2308.13560","pdf_url":"https://arxiv.org/pdf/2308.13560","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2308.13560","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5079135719","display_name":"Michael Gla\u00df","orcid":"https://orcid.org/0000-0002-8006-8843"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Glass, Michael","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075759822","display_name":"Sugato Bagchi","orcid":"https://orcid.org/0009-0005-1173-600X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bagchi, Sugato","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068065546","display_name":"Oktie Hassanzadeh","orcid":"https://orcid.org/0000-0001-5307-9857"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hassanzadeh, Oktie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059818259","display_name":"Gaetano Rossiello","orcid":"https://orcid.org/0000-0003-1042-4782"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rossiello, Gaetano","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5087419620","display_name":"Alfio Gliozzo","orcid":"https://orcid.org/0000-0002-8044-2911"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gliozzo, Alfio","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5079135719"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12205","display_name":"Time Series Analysis and Forecasting","score":0.9933000206947327,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10799","display_name":"Data Visualization and Analytics","score":0.9828000068664551,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8053663969039917},{"id":"https://openalex.org/keywords/table","display_name":"Table (database)","score":0.7470116019248962},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.7323917150497437},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6707409620285034},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5684553384780884},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.45704275369644165},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4515015184879303},{"id":"https://openalex.org/keywords/schema","display_name":"Schema (genetic algorithms)","score":0.42413803935050964},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.26441270112991333}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8053663969039917},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.7470116019248962},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.7323917150497437},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6707409620285034},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5684553384780884},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.45704275369644165},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4515015184879303},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.42413803935050964},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.26441270112991333},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2308.13560","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2308.13560","pdf_url":"https://arxiv.org/pdf/2308.13560","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2308.13560","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2308.13560","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2308.13560","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2308.13560","pdf_url":"https://arxiv.org/pdf/2308.13560","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4386269375.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2392768766","https://openalex.org/W2058118494","https://openalex.org/W2382021449","https://openalex.org/W2095118173","https://openalex.org/W2104269053","https://openalex.org/W2106424170","https://openalex.org/W1985426483","https://openalex.org/W2501188010","https://openalex.org/W4299935056","https://openalex.org/W2010935248"],"abstract_inverted_index":{"Increasing":[0],"amounts":[1],"of":[2,103,111,125,135,153],"structured":[3],"data":[4,15,21,25,34,69,82,95],"can":[5,16],"provide":[6,161],"value":[7],"for":[8,56],"research":[9,41],"and":[10,54,74,129,171],"business":[11],"if":[12],"the":[13,20,88,118,126,138,146,150],"relevant":[14],"be":[17],"located.":[18],"Often":[19],"is":[22,38],"in":[23,58,71,137],"a":[24,28,39,59,132,154],"lake":[26],"without":[27],"consistent":[29],"schema,":[30],"making":[31],"locating":[32],"useful":[33],"challenging.":[35],"Table":[36],"search":[37,93,156],"growing":[40],"area,":[42],"but":[43],"existing":[44,167],"benchmarks":[45],"have":[46],"been":[47],"limited":[48],"to":[49,91],"displayed":[50],"tables.":[51],"Tables":[52],"sized":[53],"formatted":[55],"display":[57],"Wikipedia":[60],"page":[61],"or":[62],"ArXiv":[63],"paper":[64],"are":[65],"considerably":[66],"different":[67],"from":[68,83],"tables":[70,96,115,121,130,158],"both":[72],"scale":[73],"style.":[75],"By":[76],"using":[77,166],"metadata":[78,147],"associated":[79],"with":[80,131,145],"open":[81],"government":[84],"portals,":[85],"we":[86],"create":[87],"first":[89],"dataset":[90,165],"benchmark":[92],"over":[94,157],"at":[97],"scale.":[98],"We":[99,160],"demonstrate":[100],"three":[101,109],"styles":[102],"table-to-table":[104],"related":[105],"table":[106,112],"search.":[107],"The":[108,141],"notions":[110],"relatedness":[113],"are:":[114],"produced":[116],"by":[117],"same":[119,127],"organization,":[120],"distributed":[122],"as":[123],"part":[124],"dataset,":[128],"high":[133],"degree":[134],"overlap":[136],"annotated":[139],"tags.":[140],"keyword":[142,155],"tags":[143],"provided":[144],"also":[148],"permit":[149],"automatic":[151],"creation":[152],"benchmark.":[159],"baselines":[162],"on":[163],"this":[164],"methods":[168],"including":[169],"traditional":[170],"neural":[172],"approaches.":[173]},"counts_by_year":[],"updated_date":"2026-03-11T14:59:36.786465","created_date":"2025-10-10T00:00:00"}
