{"id":"https://openalex.org/W2913537284","doi":"https://doi.org/10.1109/bigdata.2018.8622527","title":"Column Cache: Buffer Cache for Columnar Storage on HDFS","display_name":"Column Cache: Buffer Cache for Columnar Storage on HDFS","publication_year":2018,"publication_date":"2018-12-01","ids":{"openalex":"https://openalex.org/W2913537284","doi":"https://doi.org/10.1109/bigdata.2018.8622527","mag":"2913537284"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata.2018.8622527","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2018.8622527","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101886411","display_name":"Takeshi Yoshimura","orcid":"https://orcid.org/0000-0002-7147-4225"},"institutions":[{"id":"https://openalex.org/I4210145865","display_name":"IBM Research - Tokyo","ror":"https://ror.org/04915qk43","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210145865"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Takeshi Yoshimura","raw_affiliation_strings":["IBM Research \u2013 Tokyo, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"IBM Research \u2013 Tokyo, Tokyo, Japan","institution_ids":["https://openalex.org/I4210145865"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041846557","display_name":"Tatsuhiro Chiba","orcid":null},"institutions":[{"id":"https://openalex.org/I4210145865","display_name":"IBM Research - Tokyo","ror":"https://ror.org/04915qk43","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210145865"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Tatsuhiro Chiba","raw_affiliation_strings":["IBM Research \u2013 Tokyo, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"IBM Research \u2013 Tokyo, Tokyo, Japan","institution_ids":["https://openalex.org/I4210145865"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5082446517","display_name":"Hiroshi Horii","orcid":null},"institutions":[{"id":"https://openalex.org/I4210145865","display_name":"IBM Research - Tokyo","ror":"https://ror.org/04915qk43","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210145865"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Hiroshi Horii","raw_affiliation_strings":["IBM Research \u2013 Tokyo, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"IBM Research \u2013 Tokyo, Tokyo, Japan","institution_ids":["https://openalex.org/I4210145865"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101886411"],"corresponding_institution_ids":["https://openalex.org/I4210145865"],"apc_list":null,"apc_paid":null,"fwci":0.1845,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.58663935,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"282","last_page":"291"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8420634865760803},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.7577255964279175},{"id":"https://openalex.org/keywords/page-cache","display_name":"Page cache","score":0.7511632442474365},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.6510193347930908},{"id":"https://openalex.org/keywords/disk-buffer","display_name":"Disk buffer","score":0.6164056062698364},{"id":"https://openalex.org/keywords/cache-pollution","display_name":"Cache pollution","score":0.5594141483306885},{"id":"https://openalex.org/keywords/garbage-collection","display_name":"Garbage collection","score":0.550342321395874},{"id":"https://openalex.org/keywords/cache-algorithms","display_name":"Cache algorithms","score":0.45247119665145874},{"id":"https://openalex.org/keywords/cache-coloring","display_name":"Cache coloring","score":0.4225325584411621},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3696752190589905},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.3467884063720703},{"id":"https://openalex.org/keywords/cpu-cache","display_name":"CPU cache","score":0.2915703058242798},{"id":"https://openalex.org/keywords/garbage","display_name":"Garbage","score":0.1385902762413025}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8420634865760803},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.7577255964279175},{"id":"https://openalex.org/C36340418","wikidata":"https://www.wikidata.org/wiki/Q7124288","display_name":"Page cache","level":5,"score":0.7511632442474365},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.6510193347930908},{"id":"https://openalex.org/C202623185","wikidata":"https://www.wikidata.org/wiki/Q375176","display_name":"Disk buffer","level":3,"score":0.6164056062698364},{"id":"https://openalex.org/C113166858","wikidata":"https://www.wikidata.org/wiki/Q5015981","display_name":"Cache pollution","level":5,"score":0.5594141483306885},{"id":"https://openalex.org/C105122174","wikidata":"https://www.wikidata.org/wiki/Q322202","display_name":"Garbage collection","level":3,"score":0.550342321395874},{"id":"https://openalex.org/C38556500","wikidata":"https://www.wikidata.org/wiki/Q13404475","display_name":"Cache algorithms","level":4,"score":0.45247119665145874},{"id":"https://openalex.org/C201148951","wikidata":"https://www.wikidata.org/wiki/Q5015976","display_name":"Cache coloring","level":4,"score":0.4225325584411621},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3696752190589905},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3467884063720703},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.2915703058242798},{"id":"https://openalex.org/C75403996","wikidata":"https://www.wikidata.org/wiki/Q5521979","display_name":"Garbage","level":2,"score":0.1385902762413025},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata.2018.8622527","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2018.8622527","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W135304321","https://openalex.org/W1419077817","https://openalex.org/W1567228350","https://openalex.org/W1579147384","https://openalex.org/W1746258828","https://openalex.org/W1825603816","https://openalex.org/W2008633193","https://openalex.org/W2039189705","https://openalex.org/W2074935284","https://openalex.org/W2119738171","https://openalex.org/W2131975293","https://openalex.org/W2137935742","https://openalex.org/W2167510195","https://openalex.org/W2172220707","https://openalex.org/W2250017364","https://openalex.org/W2265366104","https://openalex.org/W2756536429","https://openalex.org/W3138367763","https://openalex.org/W4232094446","https://openalex.org/W6605464934","https://openalex.org/W6628428705","https://openalex.org/W6634769544","https://openalex.org/W6637806892","https://openalex.org/W6638231387","https://openalex.org/W6638533821","https://openalex.org/W6679815717","https://openalex.org/W6691453363"],"related_works":["https://openalex.org/W2535115842","https://openalex.org/W2116323004","https://openalex.org/W2118932116","https://openalex.org/W2133489088","https://openalex.org/W2298260853","https://openalex.org/W2741504934","https://openalex.org/W2539712666","https://openalex.org/W2396934146","https://openalex.org/W2020176098","https://openalex.org/W2111606042"],"abstract_inverted_index":{"Columnar":[0],"storage":[1,18,101],"is":[2,19],"a":[3,58,129,154,182],"data":[4,7,30,60],"source":[5],"for":[6,49,117,133],"analytics":[8,61],"in":[9,54,78,145,159,181],"distributed":[10,25],"computing":[11],"frameworks.":[12],"For":[13],"portability":[14],"and":[15,36,75,89,93,114,122,136,176],"scalability,":[16],"columnar":[17,29,47,100],"built":[20],"on":[21],"top":[22],"of":[23,95,157],"existing":[24],"file":[26,112],"systems":[27],"with":[28,68,149],"representations":[31,40],"such":[32,110],"as":[33,111],"Parquet,":[34],"RCFile,":[35],"ORC.":[37],"However,":[38],"these":[39],"fail":[41],"to":[42,102],"utilize":[43],"high-level":[44,108],"information":[45,109],"(e.g.,":[46],"formats)":[48],"low-level":[50],"disk":[51,73,120,172],"buffer":[52,82],"management":[53],"operating":[55,80,103],"systems.":[56,104],"As":[57],"result,":[59],"workloads":[62,161],"suffer":[63],"from":[64,99],"redundant":[65,142],"memory":[66],"buffers":[67,92],"expensive":[69],"garbage":[70,178],"collections,":[71],"unnecessary":[72],"readahead,":[74],"cache":[76,106,123,131,166],"pollution":[77],"the":[79,91],"system":[81],"cache.We":[83],"propose":[84],"column":[85,130],"cache,":[86],"which":[87],"unifies":[88],"re-structures":[90],"caches":[94],"multiple":[96],"software":[97],"layers":[98],"Column":[105],"leverages":[107],"formats":[113],"query":[115],"plans":[116],"enabling":[118],"adaptive":[119],"reads":[121,173],"eviction":[124],"policies.":[125],"We":[126],"have":[127],"developed":[128],"prototype":[132,140],"Apache":[134,146],"Parquet":[135],"observed":[137],"that":[138],"our":[139,150],"reduced":[141],"resource":[143],"utilization":[144],"Spark.":[147],"Specifically,":[148],"prototype,":[151],"Spark":[152],"showed":[153],"maximum":[155],"speedup":[156],"1.28x":[158],"TPC-DS":[160],"while":[162],"increasing":[163],"Linux":[164],"page":[165],"size":[167],"by":[168,174,186],"18%,":[169],"reducing":[170,177],"total":[171],"43%,":[175],"collection":[179],"time":[180],"Java":[183],"virtual":[184],"machine":[185],"76%.":[187]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
