{"id":"https://openalex.org/W2061986359","doi":"https://doi.org/10.1145/1645953.1646008","title":"Compact full-text indexing of versioned document collections","display_name":"Compact full-text indexing of versioned document collections","publication_year":2009,"publication_date":"2009-11-02","ids":{"openalex":"https://openalex.org/W2061986359","doi":"https://doi.org/10.1145/1645953.1646008","mag":"2061986359"},"language":"en","primary_location":{"id":"doi:10.1145/1645953.1646008","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1645953.1646008","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 18th ACM conference on Information and knowledge management","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5012143646","display_name":"Jinru He","orcid":null},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]},{"id":"https://openalex.org/I90965887","display_name":"SUNY Polytechnic Institute","ror":"https://ror.org/000fxgx19","country_code":"US","type":"education","lineage":["https://openalex.org/I90965887"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jinru He","raw_affiliation_strings":["Polytechnic Institute of NYU, Brooklyn, NY, USA","Polytechnic institute of NYU, Brooklyn, NY, USA#TAB#"],"affiliations":[{"raw_affiliation_string":"Polytechnic Institute of NYU, Brooklyn, NY, USA","institution_ids":["https://openalex.org/I90965887"]},{"raw_affiliation_string":"Polytechnic institute of NYU, Brooklyn, NY, USA#TAB#","institution_ids":["https://openalex.org/I57206974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101914876","display_name":"Hao Yan","orcid":"https://orcid.org/0000-0002-7468-528X"},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]},{"id":"https://openalex.org/I90965887","display_name":"SUNY Polytechnic Institute","ror":"https://ror.org/000fxgx19","country_code":"US","type":"education","lineage":["https://openalex.org/I90965887"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hao Yan","raw_affiliation_strings":["Polytechnic Institute of NYU, Brooklyn, NY, USA","Polytechnic institute of NYU, Brooklyn, NY, USA#TAB#"],"affiliations":[{"raw_affiliation_string":"Polytechnic Institute of NYU, Brooklyn, NY, USA","institution_ids":["https://openalex.org/I90965887"]},{"raw_affiliation_string":"Polytechnic institute of NYU, Brooklyn, NY, USA#TAB#","institution_ids":["https://openalex.org/I57206974"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5074323303","display_name":"Torsten Suel","orcid":"https://orcid.org/0000-0002-8324-980X"},"institutions":[{"id":"https://openalex.org/I90965887","display_name":"SUNY Polytechnic Institute","ror":"https://ror.org/000fxgx19","country_code":"US","type":"education","lineage":["https://openalex.org/I90965887"]},{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Torsten Suel","raw_affiliation_strings":["Polytechnic Institute of NYU, Brooklyn, NY, USA","Polytechnic institute of NYU, Brooklyn, NY, USA#TAB#"],"affiliations":[{"raw_affiliation_string":"Polytechnic Institute of NYU, Brooklyn, NY, USA","institution_ids":["https://openalex.org/I90965887"]},{"raw_affiliation_string":"Polytechnic institute of NYU, Brooklyn, NY, USA#TAB#","institution_ids":["https://openalex.org/I57206974"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5012143646"],"corresponding_institution_ids":["https://openalex.org/I57206974","https://openalex.org/I90965887"],"apc_list":null,"apc_paid":null,"fwci":5.2342,"has_fulltext":false,"cited_by_count":31,"citation_normalized_percentile":{"value":0.95459892,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"415","last_page":"424"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10742","display_name":"Peer-to-Peer Network Technologies","score":0.9830999970436096,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.8497840166091919},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8326994180679321},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.8241163492202759},{"id":"https://openalex.org/keywords/index","display_name":"Index (typography)","score":0.7043402791023254},{"id":"https://openalex.org/keywords/inverted-index","display_name":"Inverted index","score":0.6902623176574707},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.6828957796096802},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.5765778422355652},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.48198646306991577},{"id":"https://openalex.org/keywords/document-clustering","display_name":"Document clustering","score":0.44314077496528625},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.43080177903175354},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.20963111519813538},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.1411486566066742}],"concepts":[{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.8497840166091919},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8326994180679321},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.8241163492202759},{"id":"https://openalex.org/C2777382242","wikidata":"https://www.wikidata.org/wiki/Q6017816","display_name":"Index (typography)","level":2,"score":0.7043402791023254},{"id":"https://openalex.org/C130590232","wikidata":"https://www.wikidata.org/wiki/Q1671754","display_name":"Inverted index","level":3,"score":0.6902623176574707},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.6828957796096802},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.5765778422355652},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.48198646306991577},{"id":"https://openalex.org/C177937566","wikidata":"https://www.wikidata.org/wiki/Q4223102","display_name":"Document clustering","level":3,"score":0.44314077496528625},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.43080177903175354},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.20963111519813538},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.1411486566066742},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1645953.1646008","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1645953.1646008","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 18th ACM conference on Information and knowledge management","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.4099999964237213,"display_name":"Quality Education"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W38366299","https://openalex.org/W192724328","https://openalex.org/W841409518","https://openalex.org/W1483194439","https://openalex.org/W1524501441","https://openalex.org/W1556744446","https://openalex.org/W1559631118","https://openalex.org/W1562093331","https://openalex.org/W1576397915","https://openalex.org/W1669813703","https://openalex.org/W1964338930","https://openalex.org/W1969838114","https://openalex.org/W1972418517","https://openalex.org/W1975868314","https://openalex.org/W2020418493","https://openalex.org/W2042130547","https://openalex.org/W2044492449","https://openalex.org/W2052867877","https://openalex.org/W2056980397","https://openalex.org/W2076471773","https://openalex.org/W2082496345","https://openalex.org/W2111215543","https://openalex.org/W2111295912","https://openalex.org/W2115810705","https://openalex.org/W2132069633","https://openalex.org/W2132627996","https://openalex.org/W2138662031","https://openalex.org/W2140453381","https://openalex.org/W2152437528","https://openalex.org/W2156417525","https://openalex.org/W2169189540","https://openalex.org/W2170907470","https://openalex.org/W4250366158"],"related_works":["https://openalex.org/W2058987221","https://openalex.org/W2145657320","https://openalex.org/W2102493899","https://openalex.org/W2388346754","https://openalex.org/W4282568653","https://openalex.org/W2187610212","https://openalex.org/W2123678380","https://openalex.org/W180545237","https://openalex.org/W1837234277","https://openalex.org/W2093377061"],"abstract_inverted_index":{"We":[0,109],"study":[1],"the":[2,33,39,63,76,83,120,124,132,142],"problem":[3],"of":[4,22,27,65,82,116,131,135,141],"creating":[5],"highly":[6],"compressed":[7],"full-text":[8],"index":[9,58,90,104],"structures":[10,105],"for":[11,99,106],"versioned":[12],"document":[13,50,85],"collections,":[14],"that":[15,18,57,74],"is,":[16],"collections":[17,29],"contain":[19],"multiple":[20],"versions":[21,81],"each":[23,49],"document.":[24],"Important":[25],"examples":[26],"such":[28,56,107],"are":[30],"Wikipedia":[31],"or":[32],"web":[34],"page":[35],"archive":[36,130],"maintained":[37],"by":[38],"Internet":[40,143],"Archive.":[41],"A":[42],"straightforward":[43],"indexing":[44],"approach":[45],"would":[46],"simply":[47],"treat":[48],"version":[51,134],"as":[52],"a":[53,112,139],"separate":[54],"document,":[55],"size":[59],"scales":[60],"linearly":[61],"with":[62],"number":[64],"versions.":[66],"However,":[67],"several":[68],"authors":[69],"have":[70],"recently":[71],"studied":[72],"approaches":[73],"exploit":[75],"significant":[77,147],"similarities":[78],"between":[79],"different":[80],"same":[84],"to":[86],"obtain":[87],"much":[88],"smaller":[89],"sizes.":[91],"In":[92],"this":[93],"paper,":[94],"we":[95],"propose":[96],"new":[97,117],"techniques":[98,118,122],"organizing":[100],"and":[101,119,137],"compressing":[102],"inverted":[103],"collections.":[108],"also":[110],"perform":[111],"detailed":[113],"experimental":[114],"comparison":[115],"existing":[121],"in":[123],"literature.":[125],"Our":[126],"results":[127],"on":[128,138],"an":[129],"English":[133],"Wikipedia,":[136],"subset":[140],"Archive":[144],"collection,":[145],"show":[146],"benefits":[148],"over":[149],"previous":[150],"approaches.":[151]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":4},{"year":2015,"cited_by_count":4},{"year":2014,"cited_by_count":3},{"year":2013,"cited_by_count":2},{"year":2012,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
