{"id":"https://openalex.org/W4411374224","doi":"https://doi.org/10.1145/3722212.3725104","title":"Finding What You're Looking For: A Distribution-Aware Dataset Search Engine in Action","display_name":"Finding What You're Looking For: A Distribution-Aware Dataset Search Engine in Action","publication_year":2025,"publication_date":"2025-06-17","ids":{"openalex":"https://openalex.org/W4411374224","doi":"https://doi.org/10.1145/3722212.3725104"},"language":"en","primary_location":{"id":"doi:10.1145/3722212.3725104","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3722212.3725104","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Companion of the 2025 International Conference on Management of Data","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3722212.3725104","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5092545429","display_name":"Lennart Behme","orcid":null},"institutions":[{"id":"https://openalex.org/I4577782","display_name":"Technische Universit\u00e4t Berlin","ror":"https://ror.org/03v4gjf40","country_code":"DE","type":"education","lineage":["https://openalex.org/I4577782"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Lennart Behme","raw_affiliation_strings":["BIFOLD, Berlin, Germany and Technische Universit\u00e4t Berlin, Berlin, Germany"],"affiliations":[{"raw_affiliation_string":"BIFOLD, Berlin, Germany and Technische Universit\u00e4t Berlin, Berlin, Germany","institution_ids":["https://openalex.org/I4577782"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5118273579","display_name":"Leonard Gei\u00dfler","orcid":null},"institutions":[{"id":"https://openalex.org/I4577782","display_name":"Technische Universit\u00e4t Berlin","ror":"https://ror.org/03v4gjf40","country_code":"DE","type":"education","lineage":["https://openalex.org/I4577782"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Leonard Gei\u00dfler","raw_affiliation_strings":["Technische Universit\u00e4t Berlin, Berlin, Germany"],"affiliations":[{"raw_affiliation_string":"Technische Universit\u00e4t Berlin, Berlin, Germany","institution_ids":["https://openalex.org/I4577782"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5118325325","display_name":"Pratham Agrawal","orcid":null},"institutions":[{"id":"https://openalex.org/I68891433","display_name":"Indian Institute of Technology Delhi","ror":"https://ror.org/049tgcd06","country_code":"IN","type":"education","lineage":["https://openalex.org/I68891433"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Pratham Agrawal","raw_affiliation_strings":["Indian Institute of Technology Delhi, Delhi, India"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Technology Delhi, Delhi, India","institution_ids":["https://openalex.org/I68891433"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5118273580","display_name":"Emil Badura","orcid":null},"institutions":[{"id":"https://openalex.org/I4577782","display_name":"Technische Universit\u00e4t Berlin","ror":"https://ror.org/03v4gjf40","country_code":"DE","type":"education","lineage":["https://openalex.org/I4577782"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Emil Badura","raw_affiliation_strings":["Technische Universit\u00e4t Berlin, Berlin, Germany"],"affiliations":[{"raw_affiliation_string":"Technische Universit\u00e4t Berlin, Berlin, Germany","institution_ids":["https://openalex.org/I4577782"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5118273581","display_name":"Benjamin Ueber","orcid":null},"institutions":[{"id":"https://openalex.org/I4577782","display_name":"Technische Universit\u00e4t Berlin","ror":"https://ror.org/03v4gjf40","country_code":"DE","type":"education","lineage":["https://openalex.org/I4577782"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Benjamin Ueber","raw_affiliation_strings":["Technische Universit\u00e4t Berlin, Berlin, Germany"],"affiliations":[{"raw_affiliation_string":"Technische Universit\u00e4t Berlin, Berlin, Germany","institution_ids":["https://openalex.org/I4577782"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007094287","display_name":"Kaustubh Beedkar","orcid":"https://orcid.org/0009-0006-2322-4527"},"institutions":[{"id":"https://openalex.org/I68891433","display_name":"Indian Institute of Technology Delhi","ror":"https://ror.org/049tgcd06","country_code":"IN","type":"education","lineage":["https://openalex.org/I68891433"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Kaustubh Beedkar","raw_affiliation_strings":["Indian Institute of Technology Delhi, Delhi, India"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Technology Delhi, Delhi, India","institution_ids":["https://openalex.org/I68891433"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5002413906","display_name":"Volker Markl","orcid":"https://orcid.org/0009-0009-0964-026X"},"institutions":[{"id":"https://openalex.org/I4577782","display_name":"Technische Universit\u00e4t Berlin","ror":"https://ror.org/03v4gjf40","country_code":"DE","type":"education","lineage":["https://openalex.org/I4577782"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Volker Markl","raw_affiliation_strings":["BIFOLD, Berlin, Germany, Technische Universit\u00e4t Berlin, Berlin, Germany, and DFKI, Berlin, Germany"],"affiliations":[{"raw_affiliation_string":"BIFOLD, Berlin, Germany, Technische Universit\u00e4t Berlin, Berlin, Germany, and DFKI, Berlin, Germany","institution_ids":["https://openalex.org/I4577782"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5092545429"],"corresponding_institution_ids":["https://openalex.org/I4577782"],"apc_list":null,"apc_paid":null,"fwci":3.2508,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.91805083,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"39","last_page":"42"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10742","display_name":"Peer-to-Peer Network Technologies","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6791541576385498},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5261449813842773},{"id":"https://openalex.org/keywords/search-engine","display_name":"Search engine","score":0.4156843423843384},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.28901511430740356},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.08135524392127991}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6791541576385498},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5261449813842773},{"id":"https://openalex.org/C97854310","wikidata":"https://www.wikidata.org/wiki/Q19541","display_name":"Search engine","level":2,"score":0.4156843423843384},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.28901511430740356},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.08135524392127991},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3722212.3725104","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3722212.3725104","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Companion of the 2025 International Conference on Management of Data","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3722212.3725104","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3722212.3725104","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Companion of the 2025 International Conference on Management of Data","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W2795089200","https://openalex.org/W2926805670","https://openalex.org/W2963469388","https://openalex.org/W3196904276","https://openalex.org/W4309505014","https://openalex.org/W4309505042","https://openalex.org/W4312450986","https://openalex.org/W4387390023","https://openalex.org/W4393336166","https://openalex.org/W4399117321","https://openalex.org/W4399760152","https://openalex.org/W4402042501"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"The":[0],"growing":[1],"volume":[2],"of":[3,29,40,124],"academic,":[4],"commercial,":[5],"and":[6,100,129,141],"governmental":[7],"datasets":[8,126],"distributed":[9],"across":[10],"countless":[11],"independent":[12],"repositories":[13],"calls":[14],"for":[15,68],"dataset":[16,70],"search":[17,38,43,71,80,109,140],"engines":[18,44],"that":[19],"can":[20],"answer":[21],"queries":[22],"only":[23],"using":[24],"publicly":[25],"shared":[26],"metadata":[27],"instead":[28],"relying":[30],"on":[31,94],"raw":[32,113],"data":[33,74],"access.":[34],"However,":[35],"the":[36,64,104,142],"keyword":[37],"interfaces":[39],"existing":[41,79],"metadata-based":[42],"fail":[45],"to":[46,87,112,137],"capture":[47],"complex":[48],"user":[49],"needs,":[50],"such":[51],"as":[52],"distributional":[53],"requirements,":[54],"thereby":[55],"limiting":[56],"their":[57],"effectiveness.":[58],"In":[59],"this":[60],"demonstration,":[61],"we":[62,119],"present":[63],"first":[65],"end-to-end":[66],"system":[67,105,146],"distribution-aware":[69,102,139],"over":[72,121],"decentralized":[73],"repositories.":[75],"Our":[76],"prototype":[77],"combines":[78],"techniques":[81],"with":[82,132],"recently":[83],"proposed":[84],"percentile":[85],"predicates":[86],"provide":[88],"more":[89],"powerful":[90],"query":[91],"capabilities.":[92],"Based":[93],"our":[95],"novel":[96],"Dataset":[97],"Query":[98],"Language":[99],"a":[101],"index,":[103],"enables":[106],"efficient,":[107],"flexible":[108],"without":[110],"access":[111],"data.":[114],"To":[115],"demonstrate":[116],"its":[117],"utility,":[118],"curated":[120],"150,000":[122],"profiles":[123],"tabular":[125],"from":[127],"Kaggle":[128],"enriched":[130],"them":[131],"statistical":[133],"information,":[134],"enabling":[135],"attendees":[136],"explore":[138],"trade-offs":[143],"involved":[144],"in":[145],"configuration.":[147]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
