{"id":"https://openalex.org/W1987613758","doi":"https://doi.org/10.1109/apcsac.2008.4625441","title":"Semantic Data De-duplication for archival storage systems","display_name":"Semantic Data De-duplication for archival storage systems","publication_year":2008,"publication_date":"2008-08-01","ids":{"openalex":"https://openalex.org/W1987613758","doi":"https://doi.org/10.1109/apcsac.2008.4625441","mag":"1987613758"},"language":"en","primary_location":{"id":"doi:10.1109/apcsac.2008.4625441","is_oa":false,"landing_page_url":"https://doi.org/10.1109/apcsac.2008.4625441","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2008 13th Asia-Pacific Computer Systems Architecture Conference","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103171964","display_name":"Chuanyi Liu","orcid":"https://orcid.org/0000-0002-9846-9709"},"institutions":[{"id":"https://openalex.org/I130238516","display_name":"University of Minnesota","ror":"https://ror.org/017zqws13","country_code":"US","type":"education","lineage":["https://openalex.org/I130238516"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Chuanyi Liu","raw_affiliation_strings":["Department of Computer Science and Engineering, University of Minnesota, Minneapolis, MN, USA","Department of Computer Science and Technology, Tsinghua University, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, University of Minnesota, Minneapolis, MN, USA","institution_ids":["https://openalex.org/I130238516"]},{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034735107","display_name":"Dapeng Ju","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dapeng Ju","raw_affiliation_strings":["Department of Computer Science and Technology, Tsinghua University, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100352646","display_name":"Yu Gu","orcid":"https://orcid.org/0000-0001-6939-0850"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Gu","raw_affiliation_strings":["Department of Computer Science and Technology, Tsinghua University, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016596981","display_name":"Youhui Zhang","orcid":"https://orcid.org/0000-0003-2333-3580"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Youhui Zhang","raw_affiliation_strings":["Department of Computer Science and Technology, Tsinghua University, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100450575","display_name":"Dongsheng Wang","orcid":"https://orcid.org/0000-0001-5779-9026"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dongsheng Wang","raw_affiliation_strings":["Department of Computer Science and Technology, Tsinghua University, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5004446054","display_name":"David H. C. Du","orcid":"https://orcid.org/0009-0000-6244-1336"},"institutions":[{"id":"https://openalex.org/I130238516","display_name":"University of Minnesota","ror":"https://ror.org/017zqws13","country_code":"US","type":"education","lineage":["https://openalex.org/I130238516"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David H.C. Du","raw_affiliation_strings":["Department of Computer Science and Engineering, University of Minnesota, Minneapolis, MN, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, University of Minnesota, Minneapolis, MN, USA","institution_ids":["https://openalex.org/I130238516"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.1441,"has_fulltext":false,"cited_by_count":15,"citation_normalized_percentile":{"value":0.80584064,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"9"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11614","display_name":"Cloud Data Security Solutions","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9951000213623047,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8514436483383179},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.626950740814209},{"id":"https://openalex.org/keywords/file-system","display_name":"File system","score":0.574234127998352},{"id":"https://openalex.org/keywords/computer-data-storage","display_name":"Computer data storage","score":0.46014508605003357},{"id":"https://openalex.org/keywords/storage-management","display_name":"Storage management","score":0.45646679401397705},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.45523929595947266},{"id":"https://openalex.org/keywords/data-deduplication","display_name":"Data deduplication","score":0.438009649515152},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.23678329586982727}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8514436483383179},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.626950740814209},{"id":"https://openalex.org/C2780940931","wikidata":"https://www.wikidata.org/wiki/Q174989","display_name":"File system","level":2,"score":0.574234127998352},{"id":"https://openalex.org/C194739806","wikidata":"https://www.wikidata.org/wiki/Q66221","display_name":"Computer data storage","level":2,"score":0.46014508605003357},{"id":"https://openalex.org/C2984984529","wikidata":"https://www.wikidata.org/wiki/Q7619925","display_name":"Storage management","level":2,"score":0.45646679401397705},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.45523929595947266},{"id":"https://openalex.org/C32587265","wikidata":"https://www.wikidata.org/wiki/Q1182260","display_name":"Data deduplication","level":2,"score":0.438009649515152},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.23678329586982727}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/apcsac.2008.4625441","is_oa":false,"landing_page_url":"https://doi.org/10.1109/apcsac.2008.4625441","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2008 13th Asia-Pacific Computer Systems Architecture Conference","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W99339045","https://openalex.org/W200233886","https://openalex.org/W1516720694","https://openalex.org/W1542686980","https://openalex.org/W1605161111","https://openalex.org/W1831797694","https://openalex.org/W1999984505","https://openalex.org/W2056980397","https://openalex.org/W2066636486","https://openalex.org/W2096184452","https://openalex.org/W2108183412","https://openalex.org/W2119565742","https://openalex.org/W2140733653","https://openalex.org/W2141641405","https://openalex.org/W2157737097","https://openalex.org/W2167777830","https://openalex.org/W2182775338","https://openalex.org/W2205436351","https://openalex.org/W2802001362","https://openalex.org/W4239670432","https://openalex.org/W6604069196","https://openalex.org/W6608138927","https://openalex.org/W6680744948","https://openalex.org/W6683386797","https://openalex.org/W6684653763"],"related_works":["https://openalex.org/W3008013744","https://openalex.org/W2884042376","https://openalex.org/W2615141153","https://openalex.org/W2077135008","https://openalex.org/W223870977","https://openalex.org/W2739791894","https://openalex.org/W4394989021","https://openalex.org/W2340838089","https://openalex.org/W2937216651","https://openalex.org/W4236226663"],"abstract_inverted_index":{"In":[0,75],"archival":[1,108],"storage":[2,164,183,187,214],"systems,":[3],"there":[4],"is":[5,52,82,126,157,166],"a":[6,114,143,149],"huge":[7],"amount":[8],"of":[9,50,87,106,124,145,152],"duplicate":[10,56],"data":[11,46,57,79,163,202],"or":[12],"redundant":[13],"data,":[14],"which":[15,51,84,156,179],"occupy":[16],"significant":[17],"extra":[18,37],"equipments":[19],"and":[20,33,35,70,103,147,169,229],"power":[21],"consumptions,":[22],"largely":[23,230],"lowering":[24],"down":[25],"resources":[26],"utilization":[27],"(such":[28,95],"as":[29,41,96,190,197,199],"the":[30,42,48,55,59,88,92,107,112,121,130,182,186,194,201,213,232],"network":[31],"bandwidth":[32],"storage)":[34],"imposing":[36],"burden":[38],"on":[39],"management":[40],"scale":[43],"increases.":[44],"So":[45,160],"de-duplication,":[47],"goal":[49,123],"to":[53,110,127,191,222,226],"minimize":[54],"in":[58,68,72,91,142,185,237],"inter-file":[60,131],"level,":[61],"has":[62],"been":[63],"receiving":[64],"broad":[65],"attention":[66],"both":[67],"academic":[69],"industry":[71],"recent":[73],"years.":[74],"this":[76],"paper,":[77],"semantic":[78,89,117],"de-duplication":[80],"(SDD)":[81],"proposed,":[83],"makes":[85],"use":[86],"information":[90],"I/O":[93,195],"path":[94],"file":[97,99,115],"type,":[98],"format,":[100],"application":[101],"hints":[102],"filesystem":[104],"metadata)":[105],"files":[109],"direct":[111],"dividing":[113],"into":[116,138,175],"chunks":[118],"(SC).":[119],"While":[120],"main":[122],"SDD":[125,209],"maximally":[128],"reduce":[129,212],"level":[132],"duplications,":[133],"directly":[134],"storing":[135],"variable":[136],"SCes":[137,171],"disks":[139],"will":[140],"result":[141],"lot":[144],"fragments":[146],"involve":[148],"high":[150],"percentage":[151],"random":[153],"disk":[154],"accesses,":[155],"very":[158],"inefficient.":[159],"an":[161],"efficient":[162],"scheme":[165],"also":[167],"designed":[168],"implemented:":[170],"are":[172,180],"further":[173,211],"packaged":[174],"fixed":[176],"sized":[177],"Objects,":[178],"actually":[181],"units":[184],"devices,":[188],"so":[189],"speed":[192],"up":[193],"performance":[196,234],"well":[198],"ease":[200],"management.":[203],"Primary":[204],"experiments":[205],"have":[206],"demonstrated":[207],"that":[208],"can":[210],"space":[215],"compared":[216],"with":[217],"current":[218],"methods":[219],"(from":[220],"20%":[221],"near":[223],"50%":[224],"according":[225],"different":[227],"datasets),":[228],"improves":[231],"writing":[233],"(about":[235],"50%-70%":[236],"average).":[238]},"counts_by_year":[{"year":2016,"cited_by_count":4},{"year":2015,"cited_by_count":4},{"year":2014,"cited_by_count":1},{"year":2013,"cited_by_count":1},{"year":2012,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
