{"id":"https://openalex.org/W2926805670","doi":"https://doi.org/10.1145/3308558.3313685","title":"Google Dataset Search: Building a search engine for datasets in an open Web ecosystem","display_name":"Google Dataset Search: Building a search engine for datasets in an open Web ecosystem","publication_year":2019,"publication_date":"2019-05-13","ids":{"openalex":"https://openalex.org/W2926805670","doi":"https://doi.org/10.1145/3308558.3313685","mag":"2926805670"},"language":"en","primary_location":{"id":"doi:10.1145/3308558.3313685","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3308558.3313685","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The World Wide Web Conference","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3308558.3313685","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043459631","display_name":"Dan Brickley","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Dan Brickley","raw_affiliation_strings":["Google, USA"],"affiliations":[{"raw_affiliation_string":"Google, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104622126","display_name":"Matthew Burgess","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Matthew Burgess","raw_affiliation_strings":["Google, USA"],"affiliations":[{"raw_affiliation_string":"Google, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5041421536","display_name":"Natasha Noy","orcid":"https://orcid.org/0000-0002-7437-0624"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Natasha Noy","raw_affiliation_strings":["Google, USA"],"affiliations":[{"raw_affiliation_string":"Google, USA","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5043459631"],"corresponding_institution_ids":["https://openalex.org/I1291425158"],"apc_list":null,"apc_paid":null,"fwci":44.158,"has_fulltext":false,"cited_by_count":276,"citation_normalized_percentile":{"value":0.99803712,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1365","last_page":"1375"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7347419261932373},{"id":"https://openalex.org/keywords/search-engine","display_name":"Search engine","score":0.7121078372001648},{"id":"https://openalex.org/keywords/web-crawler","display_name":"Web crawler","score":0.6144587993621826},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.5628528594970703},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5545908212661743},{"id":"https://openalex.org/keywords/search-analytics","display_name":"Search analytics","score":0.5387822389602661},{"id":"https://openalex.org/keywords/spamdexing","display_name":"Spamdexing","score":0.5198357701301575},{"id":"https://openalex.org/keywords/ecosystem","display_name":"Ecosystem","score":0.4667015075683594},{"id":"https://openalex.org/keywords/web-search-engine","display_name":"Web search engine","score":0.4525352120399475},{"id":"https://openalex.org/keywords/semantic-search","display_name":"Semantic search","score":0.44587942957878113},{"id":"https://openalex.org/keywords/web-search-query","display_name":"Web search query","score":0.4431836009025574},{"id":"https://openalex.org/keywords/deep-web","display_name":"Deep Web","score":0.4210556447505951},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.21688300371170044},{"id":"https://openalex.org/keywords/ecology","display_name":"Ecology","score":0.10195446014404297},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.06599059700965881}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7347419261932373},{"id":"https://openalex.org/C97854310","wikidata":"https://www.wikidata.org/wiki/Q19541","display_name":"Search engine","level":2,"score":0.7121078372001648},{"id":"https://openalex.org/C13743948","wikidata":"https://www.wikidata.org/wiki/Q45842","display_name":"Web crawler","level":2,"score":0.6144587993621826},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.5628528594970703},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5545908212661743},{"id":"https://openalex.org/C14838553","wikidata":"https://www.wikidata.org/wiki/Q7441639","display_name":"Search analytics","level":4,"score":0.5387822389602661},{"id":"https://openalex.org/C13565553","wikidata":"https://www.wikidata.org/wiki/Q804206","display_name":"Spamdexing","level":5,"score":0.5198357701301575},{"id":"https://openalex.org/C110872660","wikidata":"https://www.wikidata.org/wiki/Q37813","display_name":"Ecosystem","level":2,"score":0.4667015075683594},{"id":"https://openalex.org/C521815418","wikidata":"https://www.wikidata.org/wiki/Q4182287","display_name":"Web search engine","level":4,"score":0.4525352120399475},{"id":"https://openalex.org/C166423231","wikidata":"https://www.wikidata.org/wiki/Q1891170","display_name":"Semantic search","level":3,"score":0.44587942957878113},{"id":"https://openalex.org/C164120249","wikidata":"https://www.wikidata.org/wiki/Q995982","display_name":"Web search query","level":3,"score":0.4431836009025574},{"id":"https://openalex.org/C46721378","wikidata":"https://www.wikidata.org/wiki/Q221989","display_name":"Deep Web","level":3,"score":0.4210556447505951},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.21688300371170044},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.10195446014404297},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.06599059700965881}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3308558.3313685","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3308558.3313685","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The World Wide Web Conference","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3308558.3313685","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3308558.3313685","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The World Wide Web Conference","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.5899999737739563,"display_name":"Climate action","id":"https://metadata.un.org/sdg/13"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W1506705062","https://openalex.org/W1942169943","https://openalex.org/W1966978420","https://openalex.org/W2053061798","https://openalex.org/W2127978399","https://openalex.org/W2148117599","https://openalex.org/W2148738951","https://openalex.org/W2167571757","https://openalex.org/W2196674927","https://openalex.org/W2197236641","https://openalex.org/W2438792749","https://openalex.org/W2562439314","https://openalex.org/W2574540012","https://openalex.org/W2597065118","https://openalex.org/W2610332888","https://openalex.org/W2613400518","https://openalex.org/W2613628460","https://openalex.org/W2619871029","https://openalex.org/W2767616304","https://openalex.org/W2950031835","https://openalex.org/W2950860947","https://openalex.org/W6737946431"],"related_works":["https://openalex.org/W2188553426","https://openalex.org/W2419328283","https://openalex.org/W2389041382","https://openalex.org/W2964491004","https://openalex.org/W2102207113","https://openalex.org/W2165096741","https://openalex.org/W2223855511","https://openalex.org/W2336826532","https://openalex.org/W4297963434","https://openalex.org/W2188884906"],"abstract_inverted_index":{"There":[0],"are":[1],"thousands":[2],"of":[3,13,56,141,158],"data":[4,24,29,50,68],"repositories":[5],"on":[6,63,97,103,116],"the":[7,98,138,142,161],"Web,":[8],"providing":[9,67,128],"access":[10,71],"to":[11,36,39,42,48,53,61,72],"millions":[12],"datasets.":[14],"National":[15],"and":[16,21,26,45,66,74,110,124,151,160],"regional":[17],"governments,":[18],"scientific":[19],"publishers":[20],"consortia,":[22],"commercial":[23],"providers,":[25],"others":[27],"publish":[28,112],"for":[30],"fields":[31],"ranging":[32],"from":[33,166],"social":[34,150],"science":[35,38,44],"life":[37],"high-energy":[40],"physics":[41],"climate":[43],"more.":[46],"Access":[47],"this":[49,78,126,145,156,167],"is":[51],"critical":[52],"facilitating":[54],"reproducibility":[55],"research":[57],"results,":[58],"enabling":[59],"scientists":[60],"build":[62],"others'":[64],"work,":[65],"journalists":[69],"easier":[70],"information":[73],"its":[75],"provenance.":[76],"In":[77,144],"paper,":[79,146],"we":[80,147,164],"discuss":[81,148],"Google":[82],"Dataset":[83],"Search,":[84],"a":[85,129],"dataset-discovery":[86],"tool":[87],"that":[88,132,163],"provides":[89],"search":[90,130],"capabilities":[91],"over":[92],"potentially":[93],"all":[94],"datasets":[95,136],"published":[96],"Web.":[99,143],"The":[100],"approach":[101],"relies":[102],"an":[104],"open":[105],"ecosystem,":[106],"where":[107],"dataset":[108],"owners":[109],"providers":[111],"semantically":[113],"enhanced":[114],"metadata":[115],"their":[117],"own":[118],"sites.":[119],"We":[120],"then":[121],"aggregate,":[122],"normalize,":[123],"reconcile":[125],"metadata,":[127],"engine":[131],"lets":[133],"users":[134],"find":[135],"in":[137,154],"\u201clong":[139],"tail\u201d":[140],"both":[149],"technical":[152],"challenges":[153],"building":[155],"type":[157],"tool,":[159],"lessons":[162],"learned":[165],"experience.":[168]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":38},{"year":2024,"cited_by_count":40},{"year":2023,"cited_by_count":63},{"year":2022,"cited_by_count":42},{"year":2021,"cited_by_count":47},{"year":2020,"cited_by_count":30},{"year":2019,"cited_by_count":11},{"year":2018,"cited_by_count":1}],"updated_date":"2026-03-01T08:55:55.761014","created_date":"2025-10-10T00:00:00"}
