{"id":"https://openalex.org/W2408297543","doi":"https://doi.org/10.1145/2908131.2908165","title":"Can we find documents in web archives without knowing their contents?","display_name":"Can we find documents in web archives without knowing their contents?","publication_year":2016,"publication_date":"2016-05-18","ids":{"openalex":"https://openalex.org/W2408297543","doi":"https://doi.org/10.1145/2908131.2908165","mag":"2408297543"},"language":"en","primary_location":{"id":"doi:10.1145/2908131.2908165","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2908131.2908165","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 8th ACM Conference on Web Science","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1701.03942","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Khoi Duy Vo","orcid":null},"institutions":[{"id":"https://openalex.org/I114112103","display_name":"Leibniz University Hannover","ror":"https://ror.org/0304hq317","country_code":"DE","type":"education","lineage":["https://openalex.org/I114112103"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Khoi Duy Vo","raw_affiliation_strings":["Leibniz Universit\u00e4t Hannover, Germany"],"affiliations":[{"raw_affiliation_string":"Leibniz Universit\u00e4t Hannover, Germany","institution_ids":["https://openalex.org/I114112103"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Tuan Tran","orcid":null},"institutions":[{"id":"https://openalex.org/I114112103","display_name":"Leibniz University Hannover","ror":"https://ror.org/0304hq317","country_code":"DE","type":"education","lineage":["https://openalex.org/I114112103"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Tuan Tran","raw_affiliation_strings":["Leibniz Universit\u00e4t Hannover, Germany"],"affiliations":[{"raw_affiliation_string":"Leibniz Universit\u00e4t Hannover, Germany","institution_ids":["https://openalex.org/I114112103"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Tu Ngoc Nguyen","orcid":null},"institutions":[{"id":"https://openalex.org/I114112103","display_name":"Leibniz University Hannover","ror":"https://ror.org/0304hq317","country_code":"DE","type":"education","lineage":["https://openalex.org/I114112103"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Tu Ngoc Nguyen","raw_affiliation_strings":["Leibniz Universit\u00e4t Hannover, Germany"],"affiliations":[{"raw_affiliation_string":"Leibniz Universit\u00e4t Hannover, Germany","institution_ids":["https://openalex.org/I114112103"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Xiaofei Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I114112103","display_name":"Leibniz University Hannover","ror":"https://ror.org/0304hq317","country_code":"DE","type":"education","lineage":["https://openalex.org/I114112103"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Xiaofei Zhu","raw_affiliation_strings":["Leibniz Universit\u00e4t Hannover, Germany"],"affiliations":[{"raw_affiliation_string":"Leibniz Universit\u00e4t Hannover, Germany","institution_ids":["https://openalex.org/I114112103"]}]},{"author_position":"last","author":{"id":null,"display_name":"Wolfgang Nejdl","orcid":null},"institutions":[{"id":"https://openalex.org/I114112103","display_name":"Leibniz University Hannover","ror":"https://ror.org/0304hq317","country_code":"DE","type":"education","lineage":["https://openalex.org/I114112103"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Wolfgang Nejdl","raw_affiliation_strings":["Leibniz Universit\u00e4t Hannover, Germany"],"affiliations":[{"raw_affiliation_string":"Leibniz Universit\u00e4t Hannover, Germany","institution_ids":["https://openalex.org/I114112103"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I114112103"],"apc_list":null,"apc_paid":null,"fwci":2.7723,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.91921374,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"173","last_page":"182"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9848999977111816,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9836999773979187,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.46389999985694885},{"id":"https://openalex.org/keywords/redundancy","display_name":"Redundancy (engineering)","score":0.42899999022483826},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.36739999055862427},{"id":"https://openalex.org/keywords/web-application","display_name":"Web application","score":0.36239999532699585},{"id":"https://openalex.org/keywords/data-web","display_name":"Data Web","score":0.32109999656677246},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.2800000011920929}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6991000175476074},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.6862999796867371},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.46389999985694885},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.42899999022483826},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3869999945163727},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.36739999055862427},{"id":"https://openalex.org/C118643609","wikidata":"https://www.wikidata.org/wiki/Q189210","display_name":"Web application","level":2,"score":0.36239999532699585},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3400999903678894},{"id":"https://openalex.org/C162005631","wikidata":"https://www.wikidata.org/wiki/Q54837","display_name":"Data Web","level":3,"score":0.32109999656677246},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.2800000011920929},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.2736999988555908},{"id":"https://openalex.org/C2776543384","wikidata":"https://www.wikidata.org/wiki/Q593289","display_name":"Information access","level":2,"score":0.257999986410141},{"id":"https://openalex.org/C35578498","wikidata":"https://www.wikidata.org/wiki/Q193424","display_name":"Web service","level":2,"score":0.2549000084400177},{"id":"https://openalex.org/C2129575","wikidata":"https://www.wikidata.org/wiki/Q54837","display_name":"Semantic Web","level":2,"score":0.25029999017715454}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/2908131.2908165","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2908131.2908165","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 8th ACM Conference on Web Science","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1701.03942","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1701.03942","pdf_url":"https://arxiv.org/pdf/1701.03942","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1701.03942","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1701.03942","pdf_url":"https://arxiv.org/pdf/1701.03942","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W90672828","https://openalex.org/W185754135","https://openalex.org/W1489893579","https://openalex.org/W1511283908","https://openalex.org/W1841349478","https://openalex.org/W1964338930","https://openalex.org/W1971772794","https://openalex.org/W1988020171","https://openalex.org/W1989468977","https://openalex.org/W1996092152","https://openalex.org/W2049718889","https://openalex.org/W2050872380","https://openalex.org/W2101944713","https://openalex.org/W2138621811","https://openalex.org/W2147057843","https://openalex.org/W2147872511","https://openalex.org/W2156893245","https://openalex.org/W2157748587","https://openalex.org/W2164052363","https://openalex.org/W2397492705","https://openalex.org/W2911964244","https://openalex.org/W6604189946"],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advances":[1],"of":[2,11,25,57,67],"preservation":[3],"technologies":[4],"have":[5],"led":[6],"to":[7,21,49,63,79],"an":[8],"increasing":[9],"number":[10],"Web":[12,74,80],"archive":[13,75,81],"systems":[14],"and":[15,38,43,53],"collections.":[16],"These":[17],"collections":[18],"are":[19],"valuable":[20],"explore":[22],"the":[23,26,50,54,64],"past":[24],"Web,":[27],"but":[28],"their":[29],"value":[30],"can":[31],"only":[32],"be":[33,47],"uncovered":[34],"with":[35],"effective":[36],"access":[37,78],"exploration":[39],"mechanisms.":[40],"Ideal":[41],"search":[42],"ranking":[44],"methods":[45],"must":[46],"robust":[48],"high":[51],"redundancy":[52],"temporal":[55],"noise":[56],"contents,":[58],"as":[59,61],"well":[60],"scalable":[62],"huge":[65],"amount":[66],"data":[68],"archived.":[69],"Despite":[70],"several":[71],"attempts":[72],"in":[73],"search,":[76],"facilitating":[77],"still":[82],"remains":[83],"a":[84],"challenging":[85],"problem.":[86]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":2}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2016-06-24T00:00:00"}
