{"id":"https://openalex.org/W2964718101","doi":"https://doi.org/10.1145/3383583.3398533","title":"Making Recommendations from Web Archives for \"Lost\" Web Pages","display_name":"Making Recommendations from Web Archives for \"Lost\" Web Pages","publication_year":2020,"publication_date":"2020-08-01","ids":{"openalex":"https://openalex.org/W2964718101","doi":"https://doi.org/10.1145/3383583.3398533","mag":"2964718101"},"language":"en","primary_location":{"id":"doi:10.1145/3383583.3398533","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3383583.3398533","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM/IEEE Joint Conference on Digital Libraries in 2020","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1908.02819","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5009372635","display_name":"Lulwah M. Alkwai","orcid":"https://orcid.org/0000-0002-6424-961X"},"institutions":[{"id":"https://openalex.org/I4210088963","display_name":"University of Ha'il","ror":"https://ror.org/013w98a82","country_code":"SA","type":"education","lineage":["https://openalex.org/I4210088963"]}],"countries":["SA"],"is_corresponding":true,"raw_author_name":"Lulwah M. Alkwai","raw_affiliation_strings":["University of Hail, Hail, Saudi Arabia"],"affiliations":[{"raw_affiliation_string":"University of Hail, Hail, Saudi Arabia","institution_ids":["https://openalex.org/I4210088963"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081811192","display_name":"Michael L. Nelson","orcid":"https://orcid.org/0000-0003-3749-8116"},"institutions":[{"id":"https://openalex.org/I81365321","display_name":"Old Dominion University","ror":"https://ror.org/04zjtrb98","country_code":"US","type":"education","lineage":["https://openalex.org/I81365321"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michael L. Nelson","raw_affiliation_strings":["Old Dominion University, Norfolk, VA, USA"],"affiliations":[{"raw_affiliation_string":"Old Dominion University, Norfolk, VA, USA","institution_ids":["https://openalex.org/I81365321"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5085719625","display_name":"Michele C. Weigle","orcid":"https://orcid.org/0000-0002-2787-7166"},"institutions":[{"id":"https://openalex.org/I81365321","display_name":"Old Dominion University","ror":"https://ror.org/04zjtrb98","country_code":"US","type":"education","lineage":["https://openalex.org/I81365321"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michele C. Weigle","raw_affiliation_strings":["Old Dominion University, Norfolk, VA, USA"],"affiliations":[{"raw_affiliation_string":"Old Dominion University, Norfolk, VA, USA","institution_ids":["https://openalex.org/I81365321"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5009372635"],"corresponding_institution_ids":["https://openalex.org/I4210088963"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.00592947,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"87","last_page":"96"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9891999959945679,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9824000000953674,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.7844938635826111},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.7257015705108643},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.718555212020874},{"id":"https://openalex.org/keywords/static-web-page","display_name":"Static web page","score":0.6458605527877808},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6035948395729065},{"id":"https://openalex.org/keywords/web-search-engine","display_name":"Web search engine","score":0.4544452130794525},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.41102293133735657},{"id":"https://openalex.org/keywords/web-navigation","display_name":"Web navigation","score":0.26127535104751587}],"concepts":[{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.7844938635826111},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.7257015705108643},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.718555212020874},{"id":"https://openalex.org/C173576120","wikidata":"https://www.wikidata.org/wiki/Q2641220","display_name":"Static web page","level":4,"score":0.6458605527877808},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6035948395729065},{"id":"https://openalex.org/C521815418","wikidata":"https://www.wikidata.org/wiki/Q4182287","display_name":"Web search engine","level":4,"score":0.4544452130794525},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.41102293133735657},{"id":"https://openalex.org/C61096286","wikidata":"https://www.wikidata.org/wiki/Q7978592","display_name":"Web navigation","level":3,"score":0.26127535104751587}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1145/3383583.3398533","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3383583.3398533","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM/IEEE Joint Conference on Digital Libraries in 2020","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1908.02819","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1908.02819","pdf_url":"https://arxiv.org/pdf/1908.02819","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"mag:2964718101","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1908.02819","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1908.02819","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1908.02819","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1908.02819","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1908.02819","pdf_url":"https://arxiv.org/pdf/1908.02819","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.6499999761581421}],"awards":[{"id":"https://openalex.org/G4610856633","display_name":"III: Small: Increasing the Value of Existing Web Archives","funder_award_id":"1526700","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2964718101.pdf","grobid_xml":"https://content.openalex.org/works/W2964718101.grobid-xml"},"referenced_works_count":28,"referenced_works":["https://openalex.org/W14035677","https://openalex.org/W257652643","https://openalex.org/W1581437579","https://openalex.org/W1971772794","https://openalex.org/W2003471189","https://openalex.org/W2008781077","https://openalex.org/W2053458824","https://openalex.org/W2067698488","https://openalex.org/W2073830447","https://openalex.org/W2082998226","https://openalex.org/W2102714480","https://openalex.org/W2117225622","https://openalex.org/W2137187346","https://openalex.org/W2152805927","https://openalex.org/W2153440728","https://openalex.org/W2156893245","https://openalex.org/W2156928086","https://openalex.org/W2217873082","https://openalex.org/W2284562765","https://openalex.org/W2293827470","https://openalex.org/W2411682725","https://openalex.org/W2521425463","https://openalex.org/W2528017044","https://openalex.org/W2544700275","https://openalex.org/W2624014132","https://openalex.org/W3103624025","https://openalex.org/W6602444040","https://openalex.org/W6638258730"],"related_works":["https://openalex.org/W3046462031","https://openalex.org/W2880089815","https://openalex.org/W2823163090","https://openalex.org/W1969210271","https://openalex.org/W2043275333","https://openalex.org/W1512518751","https://openalex.org/W2597665907","https://openalex.org/W2862073160","https://openalex.org/W2582209359","https://openalex.org/W2813272093","https://openalex.org/W2307115988","https://openalex.org/W2773338273","https://openalex.org/W3144386330","https://openalex.org/W2727824407","https://openalex.org/W2729022907","https://openalex.org/W2852426529","https://openalex.org/W2923267633","https://openalex.org/W2323584944","https://openalex.org/W2593322992","https://openalex.org/W2881800597"],"abstract_inverted_index":{"When":[0],"a":[1,4,8,98,108,185,189,308,342,352],"user":[2,12,68,132],"requests":[3],"web":[5,9,31,40,75,86,105,125,218,238],"page":[6,22,32,61,219,239],"from":[7,249,328,341,351],"archive,":[10],"the":[11,21,30,52,56,60,67,84,128,131,141,151,163,177,195,204,228,236,241,250,256,261,268,273,282,296,313,321,326,361,364],"will":[13,69],"typically":[14,43],"either":[15,58],"get":[16],"an":[17,26],"HTTP":[18,27,116,120],"200":[19,121],"if":[20,29,140,199],"is":[23,38,54,112,143,154,181],"available,":[24],"or":[25,62,148],"404":[28,117],"has":[33,59],"not":[34,70,134,155],"been":[35],"archived.":[36],"This":[37,111],"because":[39],"archives":[41],"are":[42,80,201,356],"accessed":[44],"by":[45,123],"Uniform":[46],"Resource":[47],"Identifier":[48],"(URI)":[49],"lookup,":[50],"and":[51,66,79,102,119,170,188,223,255,285,310,344,358],"response":[53],"binary:":[55],"archive":[57,129],"it":[63],"does":[64],"not,":[65],"know":[71,135],"of":[72,234,295,312,335],"other":[73],"archived":[74],"pages":[76,106,126],"that":[77,130,246,293,305],"exist":[78],"potentially":[81],"similar":[82],"to":[83,113,161,174,176,367],"requested":[85,152,237,365],"page.":[87],"In":[88,323],"this":[89],"paper,":[90],"we":[91,138,157,193,207,271],"propose":[92],"augmenting":[93],"these":[94],"binary":[95],"responses":[96,118,122],"with":[97,264,325],"model":[99],"for":[100,231,286,363],"selecting":[101],"ranking":[103],"recommended":[104,175],"in":[107,127,146,182,203,307,320],"Web":[109],"archive.":[110,205],"enhance":[114],"both":[115],"surfacing":[124],"may":[133,359],"existed.":[136],"First,":[137],"check":[139],"URI":[142,153,164,224,251,366],"already":[144],"classified":[145,298,315],"DMOZ":[147,166],"Wikipedia.":[149],"If":[150],"found,":[156],"use":[158],"machine":[159],"learning":[160],"classify":[162],"using":[165,247],"as":[167,215],"our":[168,329],"ontology":[169],"collect":[171],"candidate":[172],"URIs":[173,299,316,327,337],"user.":[178],"The":[179],"classification":[180,187,277],"two":[183],"parts,":[184],"first-level":[186],"deep":[190],"classification.":[191],"Next,":[192],"filter":[194],"candidates":[196,209],"based":[197,210],"on":[198,211],"they":[200],"present":[202],"Finally,":[206],"rank":[208],"several":[212],"features,":[213],"such":[214],"archival":[216],"quality,":[217],"popularity,":[220],"temporal":[221],"similarity,":[222],"similarity.":[225],"We":[226,244,290],"calculated":[227],"F1":[229,265],"score":[230],"different":[232],"methods":[233],"classifying":[235],"at":[240,275,301,347],"first":[242],"level.":[243,278],"found":[245,292],"all-grams":[248],"after":[252],"removing":[253],"numerals":[254],"top-level":[257],"domain":[258],"(TLD)":[259],"produced":[260],"best":[262],"result":[263],"=0.59.":[266],"For":[267,279],"deep-level":[269],"classification,":[270,281,288],"measured":[272],"accuracy":[274],"each":[276],"second-level":[280],"micro-average":[283],"F1=0.30":[284],"third-level":[287],"F1=0.15.":[289],"also":[291],"44.89%":[294],"correctly":[297,314,369],"contained":[300,317,338,346],"least":[302,348],"one":[303,349],"word":[304,350],"exists":[306],"dictionary":[309],"50.07%":[311],"long":[318],"strings":[319],"domain.":[322],"comparison":[324],"Wayback":[330],"access":[331],"logs,":[332],"only":[333,339],"5.39%":[334],"those":[336],"words":[340],"dictionary,":[343],"26.74%":[345],"dictionary.":[353],"These":[354],"percentages":[355],"low":[357],"affect":[360],"ability":[362],"be":[368],"classified.":[370]},"counts_by_year":[],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
