{"id":"https://openalex.org/W4205196823","doi":"https://doi.org/10.1109/bigdata52589.2021.9671318","title":"LDI: Learned Distribution Index for Column Stores","display_name":"LDI: Learned Distribution Index for Column Stores","publication_year":2021,"publication_date":"2021-12-15","ids":{"openalex":"https://openalex.org/W4205196823","doi":"https://doi.org/10.1109/bigdata52589.2021.9671318"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata52589.2021.9671318","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata52589.2021.9671318","pdf_url":null,"source":{"id":"https://openalex.org/S4363607718","display_name":"2021 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5038763232","display_name":"Dai Hai Ton That","orcid":"https://orcid.org/0000-0002-3935-2471"},"institutions":[{"id":"https://openalex.org/I82495205","display_name":"University of Alabama in Huntsville","ror":"https://ror.org/02zsxwr40","country_code":"US","type":"education","lineage":["https://openalex.org/I82495205"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Dai-Hai Ton That","raw_affiliation_strings":["University of Alabama, Huntsville"],"affiliations":[{"raw_affiliation_string":"University of Alabama, Huntsville","institution_ids":["https://openalex.org/I82495205"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018833411","display_name":"Mohammadsaleh Gharehdaghi","orcid":null},"institutions":[{"id":"https://openalex.org/I118353179","display_name":"DePaul University","ror":"https://ror.org/04xtx5t16","country_code":"US","type":"education","lineage":["https://openalex.org/I118353179"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mohammadsaleh Gharehdaghi","raw_affiliation_strings":["DePaul University"],"affiliations":[{"raw_affiliation_string":"DePaul University","institution_ids":["https://openalex.org/I118353179"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007690835","display_name":"Alexander Rasin","orcid":"https://orcid.org/0000-0001-7282-5763"},"institutions":[{"id":"https://openalex.org/I118353179","display_name":"DePaul University","ror":"https://ror.org/04xtx5t16","country_code":"US","type":"education","lineage":["https://openalex.org/I118353179"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alexander Rasin","raw_affiliation_strings":["DePaul University"],"affiliations":[{"raw_affiliation_string":"DePaul University","institution_ids":["https://openalex.org/I118353179"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5071436717","display_name":"Tanu Malik","orcid":"https://orcid.org/0009-0007-9656-727X"},"institutions":[{"id":"https://openalex.org/I118353179","display_name":"DePaul University","ror":"https://ror.org/04xtx5t16","country_code":"US","type":"education","lineage":["https://openalex.org/I118353179"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tanu Malik","raw_affiliation_strings":["DePaul University"],"affiliations":[{"raw_affiliation_string":"DePaul University","institution_ids":["https://openalex.org/I118353179"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5038763232"],"corresponding_institution_ids":["https://openalex.org/I82495205"],"apc_list":null,"apc_paid":null,"fwci":0.616,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.56407066,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"376","last_page":"387"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/merge","display_name":"Merge (version control)","score":0.8243184685707092},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7656047940254211},{"id":"https://openalex.org/keywords/column","display_name":"Column (typography)","score":0.7154473066329956},{"id":"https://openalex.org/keywords/sort","display_name":"sort","score":0.6717530488967896},{"id":"https://openalex.org/keywords/merge-algorithm","display_name":"Merge algorithm","score":0.6494166851043701},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5413603186607361},{"id":"https://openalex.org/keywords/merge-sort","display_name":"Merge sort","score":0.5094054341316223},{"id":"https://openalex.org/keywords/sorting","display_name":"Sorting","score":0.4804060757160187},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.32982710003852844},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.31014055013656616},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.236321359872818},{"id":"https://openalex.org/keywords/sorting-algorithm","display_name":"Sorting algorithm","score":0.20862305164337158},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.08786776661872864}],"concepts":[{"id":"https://openalex.org/C197129107","wikidata":"https://www.wikidata.org/wiki/Q1921621","display_name":"Merge (version control)","level":2,"score":0.8243184685707092},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7656047940254211},{"id":"https://openalex.org/C2780551164","wikidata":"https://www.wikidata.org/wiki/Q2306599","display_name":"Column (typography)","level":3,"score":0.7154473066329956},{"id":"https://openalex.org/C88548561","wikidata":"https://www.wikidata.org/wiki/Q347599","display_name":"sort","level":2,"score":0.6717530488967896},{"id":"https://openalex.org/C140086265","wikidata":"https://www.wikidata.org/wiki/Q11341754","display_name":"Merge algorithm","level":4,"score":0.6494166851043701},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5413603186607361},{"id":"https://openalex.org/C35555965","wikidata":"https://www.wikidata.org/wiki/Q189057","display_name":"Merge sort","level":4,"score":0.5094054341316223},{"id":"https://openalex.org/C111696304","wikidata":"https://www.wikidata.org/wiki/Q2303697","display_name":"Sorting","level":2,"score":0.4804060757160187},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.32982710003852844},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.31014055013656616},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.236321359872818},{"id":"https://openalex.org/C108094655","wikidata":"https://www.wikidata.org/wiki/Q181593","display_name":"Sorting algorithm","level":3,"score":0.20862305164337158},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.08786776661872864},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata52589.2021.9671318","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata52589.2021.9671318","pdf_url":null,"source":{"id":"https://openalex.org/S4363607718","display_name":"2021 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W1507039213","https://openalex.org/W1521148341","https://openalex.org/W1923473247","https://openalex.org/W1967601791","https://openalex.org/W1969877208","https://openalex.org/W1992023276","https://openalex.org/W2002128741","https://openalex.org/W2006296837","https://openalex.org/W2020463726","https://openalex.org/W2035735180","https://openalex.org/W2068739275","https://openalex.org/W2082491156","https://openalex.org/W2084061469","https://openalex.org/W2087946700","https://openalex.org/W2102987499","https://openalex.org/W2147232895","https://openalex.org/W2151310484","https://openalex.org/W2154809801","https://openalex.org/W2158347038","https://openalex.org/W2426624872","https://openalex.org/W2605800201","https://openalex.org/W2620824566","https://openalex.org/W2624304035","https://openalex.org/W2798441769","https://openalex.org/W2889258430","https://openalex.org/W2895533467","https://openalex.org/W3138367763","https://openalex.org/W3186477959","https://openalex.org/W6631347555","https://openalex.org/W6638231387","https://openalex.org/W6675553691","https://openalex.org/W6682033973","https://openalex.org/W6738874634","https://openalex.org/W6739233426","https://openalex.org/W6755420604"],"related_works":["https://openalex.org/W2977813765","https://openalex.org/W1989067202","https://openalex.org/W1997715509","https://openalex.org/W1822668896","https://openalex.org/W2074050947","https://openalex.org/W2165167056","https://openalex.org/W2102143009","https://openalex.org/W1852882392","https://openalex.org/W3038595913","https://openalex.org/W2356015003"],"abstract_inverted_index":{"In":[0,51],"column":[1,11,17,36,70,80],"stores,":[2],"which":[3,43],"ingest":[4,27],"large":[5],"amounts":[6],"of":[7,93,121,146],"data":[8,28,66,86,94],"into":[9],"multiple":[10],"groups,":[12],"query":[13],"performance":[14],"deteriorates.":[15],"Commercial":[16],"stores":[18,37,157],"use":[19],"log-structured":[20],"merge":[21],"(LSM)":[22],"tree":[23],"on":[24,96],"projections":[25],"to":[26,56,104,110,116,133,137],"rapidly.":[29],"LSM":[30,153],"improves":[31],"ingestion":[32],"performance,":[33],"but":[34],"in":[35,69],"the":[38,58,97,105,118,125],"sort-merge":[39,127],"phase":[40],"is":[41,67],"I/O-intensive,":[42],"slows":[44],"concurrent":[45],"queries":[46],"and":[47,60,88,135,139,154,160],"reduces":[48],"overall":[49],"throughput.":[50],"this":[52],"paper,":[53],"we":[54],"aim":[55],"reduce":[57],"sorting":[59],"merging":[61],"cost":[62],"that":[63,102,143],"arise":[64],"when":[65],"ingested":[68],"stores.":[71,81],"We":[72,129,149],"present":[73,130],"LDI,":[74],"a":[75,84,90,140],"learned":[76,98],"distribution":[77,87,106],"index":[78],"for":[79],"LDI":[82,151],"learns":[83],"frequency-based":[85],"constructs":[89],"bucket":[91],"worth":[92],"based":[95],"distribution.":[99],"Filled":[100],"buckets":[101,113],"conform":[103],"are":[107,114],"written":[108],"out":[109],"disk;":[111],"unfilled":[112],"retained":[115],"achieve":[117],"desired":[119],"level":[120],"sortedness,":[122],"thus":[123],"avoiding":[124],"expensive":[126],"phase.":[128],"an":[131],"algorithm":[132],"learn":[134],"adapt":[136],"distributions,":[138],"robust":[141],"implementation":[142],"takes":[144],"advantage":[145],"disk":[147],"parallelism.":[148],"compare":[150],"with":[152],"production":[155],"columnar":[156],"using":[158],"real":[159],"synthetic":[161],"datasets.":[162]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
