{"id":"https://openalex.org/W153213769","doi":"https://doi.org/10.1145/2487788.2488118","title":"Creating a billion-scale searchable web archive","display_name":"Creating a billion-scale searchable web archive","publication_year":2013,"publication_date":"2013-05-13","ids":{"openalex":"https://openalex.org/W153213769","doi":"https://doi.org/10.1145/2487788.2488118","mag":"153213769"},"language":"en","primary_location":{"id":"doi:10.1145/2487788.2488118","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2487788.2488118","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 22nd International Conference on World Wide Web","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5037253095","display_name":"Daniel Gomes","orcid":"https://orcid.org/0000-0002-5447-4581"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Daniel Gomes","raw_affiliation_strings":["Foundation for National Scientific Computing, Lisbon, Portugal"],"affiliations":[{"raw_affiliation_string":"Foundation for National Scientific Computing, Lisbon, Portugal","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055058678","display_name":"Miguel Costa","orcid":"https://orcid.org/0000-0002-0362-1713"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Miguel Costa","raw_affiliation_strings":["Foundation for National Scientific Computing, Lisbon, Portugal"],"affiliations":[{"raw_affiliation_string":"Foundation for National Scientific Computing, Lisbon, Portugal","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051985119","display_name":"David Cruz","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"David Cruz","raw_affiliation_strings":["Foundation for National Scientific Computing, Lisbon, Portugal"],"affiliations":[{"raw_affiliation_string":"Foundation for National Scientific Computing, Lisbon, Portugal","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112947618","display_name":"Jo\u00e3o Miranda","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jo\u00e3o Miranda","raw_affiliation_strings":["Foundation for National Scientific Computing, Lisbon, Portugal"],"affiliations":[{"raw_affiliation_string":"Foundation for National Scientific Computing, Lisbon, Portugal","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5005825538","display_name":"Sim\u00e3o Fontes","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sim\u00e3o Fontes","raw_affiliation_strings":["Foundation for National Scientific Computing, Lisbon, Portugal"],"affiliations":[{"raw_affiliation_string":"Foundation for National Scientific Computing, Lisbon, Portugal","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5037253095"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":5.8489,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.95615724,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1059","last_page":"1066"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9851999878883362,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13976","display_name":"Web visibility and informetrics","score":0.970300018787384,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.8481976985931396},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7201734185218811},{"id":"https://openalex.org/keywords/web-development","display_name":"Web development","score":0.619546115398407},{"id":"https://openalex.org/keywords/data-web","display_name":"Data Web","score":0.5242950320243835},{"id":"https://openalex.org/keywords/web-navigation","display_name":"Web navigation","score":0.5189099907875061},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.5108215808868408},{"id":"https://openalex.org/keywords/web-service","display_name":"Web service","score":0.5107712745666504},{"id":"https://openalex.org/keywords/web-standards","display_name":"Web standards","score":0.5075558423995972},{"id":"https://openalex.org/keywords/web-modeling","display_name":"Web modeling","score":0.5038935542106628},{"id":"https://openalex.org/keywords/web-design","display_name":"Web design","score":0.45075979828834534},{"id":"https://openalex.org/keywords/web-mapping","display_name":"Web mapping","score":0.44758832454681396},{"id":"https://openalex.org/keywords/web-api","display_name":"Web API","score":0.4325377941131592},{"id":"https://openalex.org/keywords/web-search-engine","display_name":"Web search engine","score":0.4138903021812439}],"concepts":[{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.8481976985931396},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7201734185218811},{"id":"https://openalex.org/C79373723","wikidata":"https://www.wikidata.org/wiki/Q386275","display_name":"Web development","level":3,"score":0.619546115398407},{"id":"https://openalex.org/C162005631","wikidata":"https://www.wikidata.org/wiki/Q54837","display_name":"Data Web","level":3,"score":0.5242950320243835},{"id":"https://openalex.org/C61096286","wikidata":"https://www.wikidata.org/wiki/Q7978592","display_name":"Web navigation","level":3,"score":0.5189099907875061},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.5108215808868408},{"id":"https://openalex.org/C35578498","wikidata":"https://www.wikidata.org/wiki/Q193424","display_name":"Web service","level":2,"score":0.5107712745666504},{"id":"https://openalex.org/C182321512","wikidata":"https://www.wikidata.org/wiki/Q1153289","display_name":"Web standards","level":3,"score":0.5075558423995972},{"id":"https://openalex.org/C130436687","wikidata":"https://www.wikidata.org/wiki/Q7978591","display_name":"Web modeling","level":3,"score":0.5038935542106628},{"id":"https://openalex.org/C521306242","wikidata":"https://www.wikidata.org/wiki/Q190637","display_name":"Web design","level":3,"score":0.45075979828834534},{"id":"https://openalex.org/C24733836","wikidata":"https://www.wikidata.org/wiki/Q649186","display_name":"Web mapping","level":4,"score":0.44758832454681396},{"id":"https://openalex.org/C127613066","wikidata":"https://www.wikidata.org/wiki/Q557770","display_name":"Web API","level":4,"score":0.4325377941131592},{"id":"https://openalex.org/C521815418","wikidata":"https://www.wikidata.org/wiki/Q4182287","display_name":"Web search engine","level":4,"score":0.4138903021812439}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/2487788.2488118","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2487788.2488118","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 22nd International Conference on World Wide Web","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.368.4924","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.368.4924","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://xldb.fc.ul.pt/xldb/publications/Gomes.etal:CreatingABillion-Scale:2013_document.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.401.9045","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.401.9045","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www2013.org/companion/p1059.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","score":0.4699999988079071,"id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W36877012","https://openalex.org/W74578451","https://openalex.org/W149193989","https://openalex.org/W599289711","https://openalex.org/W1482214997","https://openalex.org/W1525932556","https://openalex.org/W1527781663","https://openalex.org/W1551841399","https://openalex.org/W1973435495","https://openalex.org/W1984919363","https://openalex.org/W1985816353","https://openalex.org/W1992307783","https://openalex.org/W1993972354","https://openalex.org/W2044002869","https://openalex.org/W2065821053","https://openalex.org/W2097411811","https://openalex.org/W2099561919","https://openalex.org/W2108280221","https://openalex.org/W2127176025","https://openalex.org/W2160460392","https://openalex.org/W2162432120","https://openalex.org/W2295415819","https://openalex.org/W2404553285","https://openalex.org/W2767767850","https://openalex.org/W2911388033","https://openalex.org/W6601478358","https://openalex.org/W6603015088","https://openalex.org/W6628905179","https://openalex.org/W6683604222","https://openalex.org/W6713436754","https://openalex.org/W6746038542","https://openalex.org/W6893794334"],"related_works":["https://openalex.org/W2116470944","https://openalex.org/W1782470797","https://openalex.org/W3147180876","https://openalex.org/W1963685508","https://openalex.org/W2109885672","https://openalex.org/W2954487097","https://openalex.org/W2167278502","https://openalex.org/W2389737178","https://openalex.org/W4298403144","https://openalex.org/W2051477434"],"abstract_inverted_index":{"Web":[0,43,81],"information":[1,13,36],"is":[2,45,98],"ephemeral.":[3],"Several":[4],"organizations":[5,123],"around":[6],"the":[7,15,30,46,63,74,79],"world":[8],"are":[9],"struggling":[10],"to":[11,28,124],"archive":[12,51],"from":[14,62],"web":[16,39,50,64,85,129],"before":[17],"it":[18],"vanishes.":[19],"However,":[20],"users":[21],"demand":[22],"efficient":[23],"and":[24,91,115],"effective":[25],"search":[26,56,89],"mechanisms":[27],"access":[29],"already":[31],"vast":[32],"collections":[33],"of":[34,73],"historical":[35],"held":[37],"by":[38],"archives.":[40,130],"The":[41,95],"Portuguese":[42,80],"Archive":[44],"largest":[47],"full-text":[48],"searchable":[49],"publicly":[52],"available.":[53],"It":[54],"supports":[55],"over":[57],"1.2":[58],"billion":[59],"files":[60],"archived":[61],"since":[65],"1996.":[66],"This":[67],"study":[68],"contributes":[69],"with":[70],"an":[71,102],"overview":[72],"lessons":[75],"learned":[76],"while":[77,113],"developing":[78,114],"Archive,":[82],"focusing":[83],"on":[84],"data":[86],"acquisition,":[87],"ranking":[88],"results":[90],"user":[92],"interface":[93],"design.":[94],"developed":[96],"software":[97],"freely":[99],"available":[100],"as":[101],"open":[103],"source":[104],"project.":[105],"We":[106],"believe":[107],"that":[108],"sharing":[109],"our":[110],"experience":[111],"obtained":[112],"operating":[116],"a":[117],"running":[118],"service":[119],"will":[120],"enable":[121],"other":[122],"start":[125],"or":[126],"improve":[127],"their":[128]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":3},{"year":2019,"cited_by_count":1},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":3},{"year":2014,"cited_by_count":1},{"year":2013,"cited_by_count":1}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
