{"id":"https://openalex.org/W4400532462","doi":"https://doi.org/10.1145/3626772.3657877","title":"A Large Scale Test Corpus for Semantic Table Search","display_name":"A Large Scale Test Corpus for Semantic Table Search","publication_year":2024,"publication_date":"2024-07-10","ids":{"openalex":"https://openalex.org/W4400532462","doi":"https://doi.org/10.1145/3626772.3657877"},"language":"en","primary_location":{"id":"doi:10.1145/3626772.3657877","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3626772.3657877","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3626772.3657877","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5061214307","display_name":"Aristotelis Leventidis","orcid":"https://orcid.org/0000-0002-7229-3936"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Aristotelis Leventidis","raw_affiliation_strings":["Northeastern University, Boston, MA, USA"],"raw_orcid":"https://orcid.org/0000-0002-7229-3936","affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, MA, USA","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101399593","display_name":"Martin Pek\u00e1r Christensen","orcid":"https://orcid.org/0000-0003-3168-6810"},"institutions":[{"id":"https://openalex.org/I891191580","display_name":"Aalborg University","ror":"https://ror.org/04m5j1k67","country_code":"DK","type":"education","lineage":["https://openalex.org/I891191580"]}],"countries":["DK"],"is_corresponding":false,"raw_author_name":"Martin Pek\u00e1r Christensen","raw_affiliation_strings":["Aalborg University, Aalborg, Denmark"],"raw_orcid":"https://orcid.org/0000-0003-3168-6810","affiliations":[{"raw_affiliation_string":"Aalborg University, Aalborg, Denmark","institution_ids":["https://openalex.org/I891191580"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084898176","display_name":"Matteo Lissandrini","orcid":null},"institutions":[{"id":"https://openalex.org/I119439378","display_name":"University of Verona","ror":"https://ror.org/039bp8j42","country_code":"IT","type":"education","lineage":["https://openalex.org/I119439378"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Matteo Lissandrini","raw_affiliation_strings":["University of Verona &amp; Aalborg University, Verona, Italy"],"raw_orcid":"https://orcid.org/0000-0001-7922-5998","affiliations":[{"raw_affiliation_string":"University of Verona &amp; Aalborg University, Verona, Italy","institution_ids":["https://openalex.org/I119439378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086097686","display_name":"Laura Rocco","orcid":"https://orcid.org/0000-0002-8134-909X"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Laura Di Rocco","raw_affiliation_strings":["Northeastern University, Boston, MA, USA"],"raw_orcid":"https://orcid.org/0000-0002-8134-909X","affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, MA, USA","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015313855","display_name":"Katja Hose","orcid":"https://orcid.org/0000-0001-7025-8099"},"institutions":[{"id":"https://openalex.org/I145847075","display_name":"TU Wien","ror":"https://ror.org/04d836q62","country_code":"AT","type":"education","lineage":["https://openalex.org/I145847075"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Katja Hose","raw_affiliation_strings":["Technische Universit\u00e4t Wien &amp; Aalborg University, Vienna, Austria"],"raw_orcid":"https://orcid.org/0000-0001-7025-8099","affiliations":[{"raw_affiliation_string":"Technische Universit\u00e4t Wien &amp; Aalborg University, Vienna, Austria","institution_ids":["https://openalex.org/I145847075"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5022619313","display_name":"Ren\u00e9e J. Miller","orcid":"https://orcid.org/0000-0002-1484-4787"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ren\u00e9e J. Miller","raw_affiliation_strings":["Northeastern University, Boston, MA, USA"],"raw_orcid":"https://orcid.org/0000-0002-1484-4787","affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, MA, USA","institution_ids":["https://openalex.org/I12912129"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5061214307"],"corresponding_institution_ids":["https://openalex.org/I12912129"],"apc_list":null,"apc_paid":null,"fwci":0.9386,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.77066958,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1142","last_page":"1151"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.991100013256073,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7167305946350098},{"id":"https://openalex.org/keywords/table","display_name":"Table (database)","score":0.5967512726783752},{"id":"https://openalex.org/keywords/test","display_name":"Test (biology)","score":0.5782251358032227},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.5517327189445496},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5010128021240234},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.45882654190063477},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4532249867916107},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.19445595145225525},{"id":"https://openalex.org/keywords/geology","display_name":"Geology","score":0.1029096245765686}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7167305946350098},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.5967512726783752},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.5782251358032227},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.5517327189445496},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5010128021240234},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45882654190063477},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4532249867916107},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.19445595145225525},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.1029096245765686},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3626772.3657877","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3626772.3657877","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.atira.dk:publications/2ac49c43-75fc-49c2-be4c-eac50cee4459","is_oa":true,"landing_page_url":"https://vbn.aau.dk/da/publications/2ac49c43-75fc-49c2-be4c-eac50cee4459","pdf_url":"https://vbn.aau.dk/ws/files/755219302/3626772.3657877.pdf","source":{"id":"https://openalex.org/S4306401731","display_name":"VBN Forskningsportal (Aalborg Universitet)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I891191580","host_organization_name":"Aalborg University","host_organization_lineage":["https://openalex.org/I891191580"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Leventidis, A, Christensen, M P, Lissandrini, M, Di Rocco, L, Hose, K & Miller, R J 2024, A Large Scale Test Corpus for Semantic Table Search. in SIGIR 2024 - Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval. 46 edn, Association for Computing Machinery (ACM), New York, USA, pp. 1142-1151, SIGIR 2024: The 47th International ACM SIGIR Conference on Research and Development in Information Retrieval, Washington, United States, 14/07/2024. https://doi.org/10.1145/3626772.3657877","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"doi:10.1145/3626772.3657877","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3626772.3657877","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.44999998807907104,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G1537211332","display_name":null,"funder_award_id":"IIS-1956096","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4858474724","display_name":null,"funder_award_id":"DFF-8048-00051B","funder_id":"https://openalex.org/F4320322928","funder_display_name":"Danmarks Frie Forskningsfond"},{"id":"https://openalex.org/G601730711","display_name":"III : Medium: Collaborative Research: From Open Data to Open Data Curation","funder_award_id":"2107248","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6745713617","display_name":null,"funder_award_id":"1956096","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7019881830","display_name":null,"funder_award_id":"IIS-2107248,IIS-1956096,IIS-2325632","funder_id":"https://openalex.org/F4320323817","funder_display_name":"Universitas Brawijaya"},{"id":"https://openalex.org/G7243102137","display_name":null,"funder_award_id":"838216","funder_id":"https://openalex.org/F4320323817","funder_display_name":"Universitas Brawijaya"},{"id":"https://openalex.org/G8647820856","display_name":"III: Small: Semantic Version Management in Data Lakes","funder_award_id":"2325632","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320322928","display_name":"Danmarks Frie Forskningsfond","ror":"https://ror.org/02sptwz63"},{"id":"https://openalex.org/F4320323817","display_name":"Universitas Brawijaya","ror":"https://ror.org/01wk3d929"},{"id":"https://openalex.org/F4320330079","display_name":"Poul Due Jensens Fond","ror":null},{"id":"https://openalex.org/F4320332999","display_name":"Horizon 2020 Framework Programme","ror":"https://ror.org/00k4n6c32"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W1721994796","https://openalex.org/W1969621019","https://openalex.org/W1985514943","https://openalex.org/W2111869785","https://openalex.org/W2128878007","https://openalex.org/W2341748398","https://openalex.org/W2444650685","https://openalex.org/W2583976214","https://openalex.org/W2610871042","https://openalex.org/W2750991217","https://openalex.org/W2788550262","https://openalex.org/W2795089200","https://openalex.org/W2798664493","https://openalex.org/W2889003264","https://openalex.org/W2948163032","https://openalex.org/W2963174348","https://openalex.org/W2967030753","https://openalex.org/W2969723769","https://openalex.org/W2970992672","https://openalex.org/W3008881932","https://openalex.org/W3014616325","https://openalex.org/W3032215537","https://openalex.org/W3037852608","https://openalex.org/W3094328550","https://openalex.org/W3099839495","https://openalex.org/W3153273959","https://openalex.org/W3154140311","https://openalex.org/W3161215821","https://openalex.org/W3174637548","https://openalex.org/W4211158016","https://openalex.org/W4221009220","https://openalex.org/W4252076394","https://openalex.org/W4365456672","https://openalex.org/W4375928372","https://openalex.org/W4380433117","https://openalex.org/W4390636334","https://openalex.org/W4393512619"],"related_works":["https://openalex.org/W4394360958","https://openalex.org/W2948670949","https://openalex.org/W4288047943","https://openalex.org/W4394193569","https://openalex.org/W1797990060","https://openalex.org/W4232484699","https://openalex.org/W2473636215","https://openalex.org/W4242025311","https://openalex.org/W2990655940","https://openalex.org/W2478535484"],"abstract_inverted_index":{"Table":[0,97],"search":[1,38,82,155,255],"aims":[2],"to":[3,30,44,76,85,207],"answer":[4],"a":[5,8,25,91,121,132,141,153,225,244],"query":[6,34],"with":[7,116,237,261],"ranked":[9],"list":[10],"of":[11,103,106,144,164,188],"tables.":[12,197],"Unfortunately,":[13],"current":[14],"test":[15,77],"corpora":[16],"have":[17],"focused":[18],"mostly":[19],"on":[20,181],"needle-in-the-haystack":[21],"tasks,":[22],"where":[23],"only":[24],"few":[26],"tables":[27,110,175,218],"are":[28,71],"expected":[29],"exactly":[31],"match":[32],"the":[33,45,107,177,182,185,202,238],"intent.":[35],"Instead,":[36],"table":[37,67,81,254],"tasks":[39],"often":[40],"arise":[41],"in":[42,73],"response":[43],"need":[46],"for":[47,56,80,94],"retrieving":[48],"new":[49],"datasets":[50],"or":[51,62],"augmenting":[52],"existing":[53,150,221,253],"ones,":[54],"e.g.,":[55],"data":[57,60],"augmentation":[58],"within":[59],"science":[61],"machine":[63],"learning":[64],"pipelines.":[65],"Existing":[66],"repositories":[68],"and":[69,114,123,130,172,194,213,231],"benchmarks":[70],"limited":[72],"their":[74,258],"ability":[75],"retrieval":[78],"methods":[79,151],"tasks.":[83],"Thus,":[84],"close":[86],"this":[87],"gap,":[88],"we":[89,139,246],"introduce":[90],"novel":[92,100,142,154,166,263],"dataset":[93,101],"query-by-example":[95],"Semantic":[96],"Search.":[98],"This":[99,198],"consists":[102,163,187],"two":[104,117],"snapshots":[105],"large-scale":[108,133],"Wikipedia":[109],"collection":[111],"from":[112,176],"2013":[113,178],"2019":[115,183],"important":[118],"additions:":[119],"(1)":[120],"page":[122],"topic":[124],"aware":[125],"ground":[126],"truth":[127],"relevance":[128,170,192,241],"judgment":[129],"(2)":[131],"DBpedia":[134],"entity":[135],"linking":[136],"annotation.":[137],"Moreover,":[138],"generate":[140],"set":[143],"entity-centric":[145],"queries":[146,212],"that":[147,233],"allows":[148],"testing":[149],"under":[152],"scenario:":[156],"semantic":[157],"exploratory":[158],"search.":[159],"The":[160],"resulting":[161],"resource":[162,186,201],"9,296":[165],"queries,":[167,190],"610,553":[168],"query-table":[169],"annotations,":[171,193],"238,038":[173],"entity-linked":[174],"snapshot.":[179],"Similarly,":[180],"snapshot,":[184],"2,560":[189],"958,214":[191],"457,714":[195],"total":[196],"makes":[199],"our":[200],"largest":[203],"annotated":[204,217],"table-search":[205],"corpus":[206],"date":[208],"(97":[209],"times":[210,215],"more":[211,216],"956":[214],"than":[219],"any":[220],"benchmark).":[222],"We":[223],"perform":[224],"user":[226],"study":[227],"among":[228],"domain":[229],"experts":[230],"prove":[232],"these":[234],"annotators":[235],"agree":[236],"automatically":[239],"generated":[240],"annotations.":[242],"As":[243],"result,":[245],"can":[247],"re-evaluate":[248],"some":[249],"basic":[250],"assumptions":[251],"behind":[252],"approaches":[256],"identifying":[257],"shortcomings":[259],"along":[260],"promising":[262],"research":[264],"directions.":[265]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
