{"id":"https://openalex.org/W2146698939","doi":"https://doi.org/10.1145/2421648.2421652","title":"Designing a fast file system crawler with incremental differencing","display_name":"Designing a fast file system crawler with incremental differencing","publication_year":2012,"publication_date":"2012-12-18","ids":{"openalex":"https://openalex.org/W2146698939","doi":"https://doi.org/10.1145/2421648.2421652","mag":"2146698939"},"language":"en","primary_location":{"id":"doi:10.1145/2421648.2421652","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2421648.2421652","pdf_url":null,"source":{"id":"https://openalex.org/S50071195","display_name":"ACM SIGOPS Operating Systems Review","issn_l":"0163-5980","issn":["0163-5980","1943-586X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM SIGOPS Operating Systems Review","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5026480522","display_name":"Tim Bisson","orcid":null},"institutions":[{"id":"https://openalex.org/I1283576576","display_name":"NetApp (United States)","ror":"https://ror.org/05c4cm338","country_code":"US","type":"company","lineage":["https://openalex.org/I1283576576"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Tim Bisson","raw_affiliation_strings":["NetApp Inc"],"affiliations":[{"raw_affiliation_string":"NetApp Inc","institution_ids":["https://openalex.org/I1283576576"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017661088","display_name":"Yuvraj Patel","orcid":null},"institutions":[{"id":"https://openalex.org/I1283576576","display_name":"NetApp (United States)","ror":"https://ror.org/05c4cm338","country_code":"US","type":"company","lineage":["https://openalex.org/I1283576576"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yuvraj Patel","raw_affiliation_strings":["NetApp Inc"],"affiliations":[{"raw_affiliation_string":"NetApp Inc","institution_ids":["https://openalex.org/I1283576576"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033478777","display_name":"Shankar Pasupathy","orcid":null},"institutions":[{"id":"https://openalex.org/I1283576576","display_name":"NetApp (United States)","ror":"https://ror.org/05c4cm338","country_code":"US","type":"company","lineage":["https://openalex.org/I1283576576"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shankar Pasupathy","raw_affiliation_strings":["NetApp Inc"],"affiliations":[{"raw_affiliation_string":"NetApp Inc","institution_ids":["https://openalex.org/I1283576576"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5026480522"],"corresponding_institution_ids":["https://openalex.org/I1283576576"],"apc_list":null,"apc_paid":null,"fwci":0.7105,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.82376811,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"46","issue":"3","first_page":"11","last_page":"19"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8920906782150269},{"id":"https://openalex.org/keywords/web-crawler","display_name":"Web crawler","score":0.7521710395812988},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.694168210029602},{"id":"https://openalex.org/keywords/file-system","display_name":"File system","score":0.6311834454536438},{"id":"https://openalex.org/keywords/unix-file-types","display_name":"Unix file types","score":0.6155438423156738},{"id":"https://openalex.org/keywords/fork","display_name":"Fork (system call)","score":0.5193118453025818},{"id":"https://openalex.org/keywords/posix","display_name":"POSIX","score":0.4972210228443146},{"id":"https://openalex.org/keywords/file-control-block","display_name":"File Control Block","score":0.48138168454170227},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.471788614988327},{"id":"https://openalex.org/keywords/computer-file","display_name":"Computer file","score":0.44404393434524536},{"id":"https://openalex.org/keywords/versioning-file-system","display_name":"Versioning file system","score":0.44050851464271545},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.331398606300354},{"id":"https://openalex.org/keywords/journaling-file-system","display_name":"Journaling file system","score":0.3283544182777405},{"id":"https://openalex.org/keywords/stub-file","display_name":"Stub file","score":0.25751426815986633}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8920906782150269},{"id":"https://openalex.org/C13743948","wikidata":"https://www.wikidata.org/wiki/Q45842","display_name":"Web crawler","level":2,"score":0.7521710395812988},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.694168210029602},{"id":"https://openalex.org/C2780940931","wikidata":"https://www.wikidata.org/wiki/Q174989","display_name":"File system","level":2,"score":0.6311834454536438},{"id":"https://openalex.org/C21729314","wikidata":"https://www.wikidata.org/wiki/Q7896858","display_name":"Unix file types","level":4,"score":0.6155438423156738},{"id":"https://openalex.org/C40523978","wikidata":"https://www.wikidata.org/wiki/Q14675","display_name":"Fork (system call)","level":2,"score":0.5193118453025818},{"id":"https://openalex.org/C532756234","wikidata":"https://www.wikidata.org/wiki/Q14658","display_name":"POSIX","level":2,"score":0.4972210228443146},{"id":"https://openalex.org/C180500224","wikidata":"https://www.wikidata.org/wiki/Q1412592","display_name":"File Control Block","level":4,"score":0.48138168454170227},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.471788614988327},{"id":"https://openalex.org/C95637964","wikidata":"https://www.wikidata.org/wiki/Q82753","display_name":"Computer file","level":2,"score":0.44404393434524536},{"id":"https://openalex.org/C166807848","wikidata":"https://www.wikidata.org/wiki/Q3445065","display_name":"Versioning file system","level":4,"score":0.44050851464271545},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.331398606300354},{"id":"https://openalex.org/C2225880","wikidata":"https://www.wikidata.org/wiki/Q579047","display_name":"Journaling file system","level":3,"score":0.3283544182777405},{"id":"https://openalex.org/C13674803","wikidata":"https://www.wikidata.org/wiki/Q7627301","display_name":"Stub file","level":3,"score":0.25751426815986633}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2421648.2421652","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2421648.2421652","pdf_url":null,"source":{"id":"https://openalex.org/S50071195","display_name":"ACM SIGOPS Operating Systems Review","issn_l":"0163-5980","issn":["0163-5980","1943-586X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM SIGOPS Operating Systems Review","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320307764","display_name":"Microsoft","ror":"https://ror.org/00d0nc645"},{"id":"https://openalex.org/F4320316505","display_name":"NetApp","ror":"https://ror.org/05c4cm338"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W143620156","https://openalex.org/W1613836731","https://openalex.org/W1751370186","https://openalex.org/W2007687650","https://openalex.org/W2029341294","https://openalex.org/W2127901684","https://openalex.org/W2141928865","https://openalex.org/W2164542999","https://openalex.org/W2295141584"],"related_works":["https://openalex.org/W4313226861","https://openalex.org/W2058294301","https://openalex.org/W2600623072","https://openalex.org/W1481669841","https://openalex.org/W1571833289","https://openalex.org/W2154161369","https://openalex.org/W2129512728","https://openalex.org/W2203797969","https://openalex.org/W2482046578","https://openalex.org/W2369276881"],"abstract_inverted_index":{"Search":[0],"engines":[1],"for":[2],"storage":[3],"systems":[4],"rely":[5],"on":[6,45,55],"crawlers":[7,125],"to":[8,16,107,118,134],"gather":[9],"the":[10,27,64,75,82,85,94,108,128,132,145],"list":[11,32,137],"of":[12,21,43,77,96,138],"files":[13,121],"that":[14,100],"need":[15],"be":[17,34],"indexed.":[18],"The":[19,103],"recency":[20],"an":[22],"index":[23],"is":[24,51],"determined":[25],"by":[26],"speed":[28],"at":[29],"which":[30],"this":[31,60],"can":[33],"gathered.":[35],"While":[36],"there":[37,50],"has":[38],"been":[39],"a":[40,68,113,136,141],"substantial":[41],"amount":[42,95],"literature":[44,54],"building":[46,67],"efficient":[47],"web":[48],"crawlers,":[49],"very":[52],"little":[53],"file":[56,69,79,88,109],"system":[57,70,80,89],"crawlers.":[58],"In":[59],"paper":[61],"we":[62,126],"discuss":[63],"challenges":[65],"in":[66,148],"crawler.":[71],"We":[72],"then":[73],"present":[74],"design":[76],"two":[78],"crawlers:":[81],"first":[83],"uses":[84],"standard":[86],"POSIX":[87],"API":[90,115],"but":[91],"carefully":[92],"controls":[93],"memory":[97],"and":[98,112,144],"CPU":[99],"it":[101],"uses.":[102],"second":[104],"leverages":[105],"modifications":[106],"systems's":[110],"internals,":[111],"new":[114],"called":[116],"SnapDiff,":[117],"detect":[119],"modified":[120],"rapidly.":[122],"For":[123],"both":[124],"describe":[127],"incremental":[129],"differencing":[130],"design;":[131],"method":[133],"produce":[135],"changes":[139],"between":[140],"previous":[142],"crawl":[143],"current":[146],"point":[147],"time.":[149]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2015,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
