{"id":"https://openalex.org/W2055181641","doi":"https://doi.org/10.1109/msst.2014.6855542","title":"The case for sampling on very large file systems","display_name":"The case for sampling on very large file systems","publication_year":2014,"publication_date":"2014-06-01","ids":{"openalex":"https://openalex.org/W2055181641","doi":"https://doi.org/10.1109/msst.2014.6855542","mag":"2055181641"},"language":"en","primary_location":{"id":"doi:10.1109/msst.2014.6855542","is_oa":false,"landing_page_url":"https://doi.org/10.1109/msst.2014.6855542","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 30th Symposium on Mass Storage Systems and Technologies (MSST)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5086245700","display_name":"George Goldberg","orcid":null},"institutions":[{"id":"https://openalex.org/I4210167297","display_name":"IBM Research - Haifa","ror":"https://ror.org/05rw9t746","country_code":"IL","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210167297"]}],"countries":["IL"],"is_corresponding":true,"raw_author_name":"George Goldberg","raw_affiliation_strings":["IBM Haifa Research Labs, Haifa, Haifa, IL","IBM Research Haifa, Israel"],"affiliations":[{"raw_affiliation_string":"IBM Haifa Research Labs, Haifa, Haifa, IL","institution_ids":[]},{"raw_affiliation_string":"IBM Research Haifa, Israel","institution_ids":["https://openalex.org/I4210167297"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036936298","display_name":"Danny Harnik","orcid":"https://orcid.org/0009-0000-0614-6543"},"institutions":[{"id":"https://openalex.org/I4210167297","display_name":"IBM Research - Haifa","ror":"https://ror.org/05rw9t746","country_code":"IL","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210167297"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Danny Harnik","raw_affiliation_strings":["IBM Haifa Research Labs, Haifa, Haifa, IL","IBM Research Haifa, Israel"],"affiliations":[{"raw_affiliation_string":"IBM Haifa Research Labs, Haifa, Haifa, IL","institution_ids":[]},{"raw_affiliation_string":"IBM Research Haifa, Israel","institution_ids":["https://openalex.org/I4210167297"]}]},{"author_position":"last","author":{"id":null,"display_name":"Dmitry Sotnikov","orcid":null},"institutions":[{"id":"https://openalex.org/I4210167297","display_name":"IBM Research - Haifa","ror":"https://ror.org/05rw9t746","country_code":"IL","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210167297"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Dmitry Sotnikov","raw_affiliation_strings":["IBM Haifa Research Labs, Haifa, Haifa, IL","IBM Research Haifa, Israel"],"affiliations":[{"raw_affiliation_string":"IBM Haifa Research Labs, Haifa, Haifa, IL","institution_ids":[]},{"raw_affiliation_string":"IBM Research Haifa, Israel","institution_ids":["https://openalex.org/I4210167297"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5086245700"],"corresponding_institution_ids":["https://openalex.org/I4210167297"],"apc_list":null,"apc_paid":null,"fwci":0.7195,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.75774988,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"11"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10742","display_name":"Peer-to-Peer Network Technologies","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8340134620666504},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.690514862537384},{"id":"https://openalex.org/keywords/usability","display_name":"Usability","score":0.5113124847412109},{"id":"https://openalex.org/keywords/analytics","display_name":"Analytics","score":0.46119049191474915},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4485185146331787},{"id":"https://openalex.org/keywords/file-system","display_name":"File system","score":0.44725751876831055},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4167143702507019},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.36616289615631104},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.1649150550365448},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.13224411010742188}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8340134620666504},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.690514862537384},{"id":"https://openalex.org/C170130773","wikidata":"https://www.wikidata.org/wiki/Q216378","display_name":"Usability","level":2,"score":0.5113124847412109},{"id":"https://openalex.org/C79158427","wikidata":"https://www.wikidata.org/wiki/Q485396","display_name":"Analytics","level":2,"score":0.46119049191474915},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4485185146331787},{"id":"https://openalex.org/C2780940931","wikidata":"https://www.wikidata.org/wiki/Q174989","display_name":"File system","level":2,"score":0.44725751876831055},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4167143702507019},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.36616289615631104},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.1649150550365448},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.13224411010742188},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/msst.2014.6855542","is_oa":false,"landing_page_url":"https://doi.org/10.1109/msst.2014.6855542","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 30th Symposium on Mass Storage Systems and Technologies (MSST)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W148271226","https://openalex.org/W598218960","https://openalex.org/W1601435884","https://openalex.org/W1662601665","https://openalex.org/W1992023276","https://openalex.org/W2018666252","https://openalex.org/W2048937748","https://openalex.org/W2050301160","https://openalex.org/W2058991275","https://openalex.org/W2059015573","https://openalex.org/W2084333665","https://openalex.org/W2103012681","https://openalex.org/W2103109046","https://openalex.org/W2118229812","https://openalex.org/W2119885577","https://openalex.org/W2143730413","https://openalex.org/W2151065878","https://openalex.org/W2161463763","https://openalex.org/W2163336450","https://openalex.org/W2168540086","https://openalex.org/W2170432965","https://openalex.org/W3139262251","https://openalex.org/W4226478513","https://openalex.org/W4233413206","https://openalex.org/W4234319094","https://openalex.org/W4301133738","https://openalex.org/W6606083523","https://openalex.org/W6636065709","https://openalex.org/W6648171237","https://openalex.org/W6663050676","https://openalex.org/W6684798095","https://openalex.org/W6684841828"],"related_works":["https://openalex.org/W2429057255","https://openalex.org/W2187546663","https://openalex.org/W148745890","https://openalex.org/W4389670110","https://openalex.org/W2611942503","https://openalex.org/W4315621326","https://openalex.org/W2899790217","https://openalex.org/W2598865957","https://openalex.org/W1576092969","https://openalex.org/W2524154428"],"abstract_inverted_index":{"Sampling":[0],"has":[1,38],"long":[2],"been":[3,40],"a":[4],"prominent":[5],"tool":[6],"in":[7,34,50,79,88,120,126],"statistics":[8,154],"and":[9,12,42,98,110,115,148,150],"analytics,":[10],"first":[11],"foremost":[13],"when":[14],"very":[15,26,77,80,156],"large":[16,27,81,131,157],"amounts":[17],"of":[18,25,144,153],"data":[19,32,158],"are":[20,71],"involved.":[21],"In":[22,64,137],"the":[23,59,92,117],"realm":[24],"file":[28,82,132],"systems":[29,133],"(and":[30],"hierarchical":[31],"stores":[33],"general),":[35],"however,":[36],"sampling":[37,49,61,75,104,129],"mostly":[39],"ignored":[41],"for":[43,73],"several":[44,124],"good":[45],"reasons.":[46],"Mainly,":[47],"running":[48],"such":[51],"an":[52],"environment":[53],"introduces":[54],"technical":[55,93],"challenges":[56],"that":[57,69,105],"make":[58],"entire":[60],"process":[62],"non-beneficial.":[63],"this":[65,86],"work":[66],"we":[67,96,122,139],"demonstrate":[68,123],"there":[70],"cases":[72],"which":[74,121,127],"is":[76,106,134],"worthwhile":[78],"systems.":[83],"We":[84],"address":[85],"topic":[87],"two":[89],"aspect:":[90],"(a)":[91],"side":[94],"where":[95],"design":[97],"implement":[99],"solutions":[100],"to":[101],"efficient":[102],"weighted":[103,128],"also":[107],"distributed,":[108],"one-pass":[109],"addresses":[111],"multiple":[112],"efficiency":[113],"aspects;":[114],"(b)":[116],"usability":[118],"aspect":[119],"use-cases":[125,141],"over":[130],"extremely":[135],"beneficial.":[136],"particular,":[138],"show":[140],"regarding":[142],"estimation":[143],"compression":[145],"ratios,":[146],"testing":[147],"auditing":[149],"offline":[151],"collection":[152],"on":[155],"stores.":[159]},"counts_by_year":[{"year":2015,"cited_by_count":2}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
