{"id":"https://openalex.org/W7124855673","doi":"https://doi.org/10.1109/cloudcom67567.2025.11331481","title":"SemanticPrefetcher: Accelerate Data Lake Access with Semantics-Aware File Prefetching","display_name":"SemanticPrefetcher: Accelerate Data Lake Access with Semantics-Aware File Prefetching","publication_year":2025,"publication_date":"2025-11-14","ids":{"openalex":"https://openalex.org/W7124855673","doi":"https://doi.org/10.1109/cloudcom67567.2025.11331481"},"language":null,"primary_location":{"id":"doi:10.1109/cloudcom67567.2025.11331481","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cloudcom67567.2025.11331481","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 lEEE International Conference on Cloud Computing Technology and Science (CloudCom)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5091298714","display_name":"Tianze Wang","orcid":"https://orcid.org/0000-0001-8757-5998"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Tianze Wang","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036592508","display_name":"Guanjie Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guanjie Wang","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102760015","display_name":"Mu Yang","orcid":"https://orcid.org/0000-0001-9442-9243"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingyan Yang","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123449942","display_name":"Manqi Luo","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Manqi Luo","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123415325","display_name":"Mingchuan Zou","orcid":null},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Mingchuan Zou","raw_affiliation_strings":["University of Cambridge"],"affiliations":[{"raw_affiliation_string":"University of Cambridge","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100418548","display_name":"Chen Chen","orcid":"https://orcid.org/0000-0003-2104-534X"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chen Chen","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5120913770","display_name":"Minyi Guo","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Minyi Guo","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5091298714"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.83704764,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.7581999897956848,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.7581999897956848,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.10450000315904617,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11614","display_name":"Cloud Data Security Solutions","score":0.03869999945163727,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.6053000092506409},{"id":"https://openalex.org/keywords/data-access","display_name":"Data access","score":0.5097000002861023},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.4885999858379364},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.4839000105857849},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.4203999936580658},{"id":"https://openalex.org/keywords/data-stream-mining","display_name":"Data stream mining","score":0.4180000126361847}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8849999904632568},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.6053000092506409},{"id":"https://openalex.org/C47487241","wikidata":"https://www.wikidata.org/wiki/Q5227230","display_name":"Data access","level":2,"score":0.5097000002861023},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.4885999858379364},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.4839000105857849},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.4203999936580658},{"id":"https://openalex.org/C89198739","wikidata":"https://www.wikidata.org/wiki/Q3079880","display_name":"Data stream mining","level":2,"score":0.4180000126361847},{"id":"https://openalex.org/C2778869765","wikidata":"https://www.wikidata.org/wiki/Q6028363","display_name":"Inefficiency","level":2,"score":0.40400001406669617},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.33649998903274536},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.32359999418258667},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.30390000343322754},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.30059999227523804},{"id":"https://openalex.org/C2780940931","wikidata":"https://www.wikidata.org/wiki/Q174989","display_name":"File system","level":2,"score":0.2888999879360199},{"id":"https://openalex.org/C90312973","wikidata":"https://www.wikidata.org/wiki/Q7449052","display_name":"Semantic data model","level":2,"score":0.27709999680519104},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.2662000060081482},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.2524000108242035}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cloudcom67567.2025.11331481","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cloudcom67567.2025.11331481","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 lEEE International Conference on Cloud Computing Technology and Science (CloudCom)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":4,"referenced_works":["https://openalex.org/W2096272167","https://openalex.org/W2124937058","https://openalex.org/W2620041922","https://openalex.org/W3176014708"],"related_works":[],"abstract_inverted_index":{"Storage-compute":[0],"disaggregation":[1],"has":[2],"become":[3,40],"a":[4,52,79],"mainstream":[5],"paradigm":[6],"in":[7,68,142,154],"cloud":[8],"computing,":[9],"yet":[10],"data":[11,156],"lake":[12],"workloads":[13],"introduce":[14],"distinct":[15],"prefetching":[16,38,55,153],"challenges:":[17],"massive":[18],"numbers":[19],"of":[20],"small":[21],"files,":[22],"interleaved":[23],"multi-tenant":[24,90],"streams,":[25],"and":[26,36,46,92,104,127,151],"frequent":[27],"one-time":[28],"accesses.":[29,99],"Under":[30],"these":[31],"conditions,":[32],"traditional":[33],"sequential,":[34],"correlation-based,":[35],"semantic":[37,81],"methods":[39],"ineffective,":[41],"leading":[42],"to":[43,96,125,133],"cache":[44],"inefficiency":[45],"high":[47],"latency.":[48],"We":[49],"propose":[50],"SemanticPrefetcher,":[51],"lightweight,":[53],"semantic-aware":[54],"mechanism":[56],"that":[57,139],"incrementally":[58],"constructs":[59],"meaningful":[60],"access":[61,110],"streams":[62,88],"at":[63],"runtime.":[64],"The":[65],"system":[66],"operates":[67],"three":[69],"stages:":[70],"it":[71],"tokenizes":[72],"file":[73,143],"paths":[74,144],"or":[75],"object":[76],"names":[77],"into":[78,86,108],"unified":[80],"representation,":[82],"clusters":[83],"related":[84],"requests":[85,107],"coherent":[87],"despite":[89],"interleaving,":[91],"detects":[93],"naming":[94],"regularities":[95],"predict":[97],"future":[98],"This":[100],"design":[101],"transforms":[102],"mixed":[103],"seemingly":[105],"disordered":[106],"predictable":[109],"flows":[111],"without":[112],"application":[113],"modifications.":[114],"Implemented":[115],"on":[116],"JuiceFS,":[117],"SemanticPrefetcher":[118],"reduces":[119],"end-to-end":[120],"execution":[121],"time":[122],"by":[123,130],"up":[124],"39.6%":[126],"read":[128],"latency":[129],"79.3%":[131],"compared":[132],"state-of-the-art":[134],"baselines.":[135],"These":[136],"results":[137],"demonstrate":[138],"implicit":[140],"semantics":[141],"can":[145],"be":[146],"effectively":[147],"leveraged":[148],"for":[149],"robust":[150],"efficient":[152],"cloud-scale":[155],"lakes.":[157]},"counts_by_year":[],"updated_date":"2026-01-22T23:29:09.771500","created_date":"2026-01-21T00:00:00"}
