{"id":"https://openalex.org/W2038879656","doi":"https://doi.org/10.1145/1779599.1779600","title":"Distributed indexing of web scale datasets for the cloud","display_name":"Distributed indexing of web scale datasets for the cloud","publication_year":2010,"publication_date":"2010-04-26","ids":{"openalex":"https://openalex.org/W2038879656","doi":"https://doi.org/10.1145/1779599.1779600","mag":"2038879656"},"language":"en","primary_location":{"id":"doi:10.1145/1779599.1779600","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1779599.1779600","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2010 Workshop on Massive Data Analytics on the Cloud","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://doi.org/10.1145/1779599.1779600","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5004421134","display_name":"Ioannis Konstantinou","orcid":"https://orcid.org/0000-0002-7142-8106"},"institutions":[{"id":"https://openalex.org/I174458059","display_name":"National Technical University of Athens","ror":"https://ror.org/03cx6bg69","country_code":"GR","type":"education","lineage":["https://openalex.org/I174458059"]}],"countries":["GR"],"is_corresponding":true,"raw_author_name":"Ioannis Konstantinou","raw_affiliation_strings":["National Technical University of Athens","National Technical University Of Athens"],"affiliations":[{"raw_affiliation_string":"National Technical University of Athens","institution_ids":["https://openalex.org/I174458059"]},{"raw_affiliation_string":"National Technical University Of Athens","institution_ids":["https://openalex.org/I174458059"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033997260","display_name":"Evangelos Angelou","orcid":"https://orcid.org/0009-0001-6542-9647"},"institutions":[{"id":"https://openalex.org/I174458059","display_name":"National Technical University of Athens","ror":"https://ror.org/03cx6bg69","country_code":"GR","type":"education","lineage":["https://openalex.org/I174458059"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Evangelos Angelou","raw_affiliation_strings":["National Technical University of Athens","National Technical University Of Athens"],"affiliations":[{"raw_affiliation_string":"National Technical University of Athens","institution_ids":["https://openalex.org/I174458059"]},{"raw_affiliation_string":"National Technical University Of Athens","institution_ids":["https://openalex.org/I174458059"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005628435","display_name":"Dimitrios Tsoumakos","orcid":"https://orcid.org/0000-0003-4420-8949"},"institutions":[{"id":"https://openalex.org/I174458059","display_name":"National Technical University of Athens","ror":"https://ror.org/03cx6bg69","country_code":"GR","type":"education","lineage":["https://openalex.org/I174458059"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Dimitrios Tsoumakos","raw_affiliation_strings":["National Technical University of Athens","National Technical University Of Athens"],"affiliations":[{"raw_affiliation_string":"National Technical University of Athens","institution_ids":["https://openalex.org/I174458059"]},{"raw_affiliation_string":"National Technical University Of Athens","institution_ids":["https://openalex.org/I174458059"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023526161","display_name":"Nectarios Koziris","orcid":"https://orcid.org/0000-0002-4890-8427"},"institutions":[{"id":"https://openalex.org/I174458059","display_name":"National Technical University of Athens","ror":"https://ror.org/03cx6bg69","country_code":"GR","type":"education","lineage":["https://openalex.org/I174458059"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Nectarios Koziris","raw_affiliation_strings":["National Technical University of Athens","National Technical University Of Athens"],"affiliations":[{"raw_affiliation_string":"National Technical University of Athens","institution_ids":["https://openalex.org/I174458059"]},{"raw_affiliation_string":"National Technical University Of Athens","institution_ids":["https://openalex.org/I174458059"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5004421134"],"corresponding_institution_ids":["https://openalex.org/I174458059"],"apc_list":null,"apc_paid":null,"fwci":11.0993,"has_fulltext":false,"cited_by_count":22,"citation_normalized_percentile":{"value":0.9812908,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/nosql","display_name":"NoSQL","score":0.9079948663711548},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.87324059009552},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.8329038619995117},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.7188442349433899},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.658121645450592},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.4766070246696472},{"id":"https://openalex.org/keywords/flexibility","display_name":"Flexibility (engineering)","score":0.46148931980133057},{"id":"https://openalex.org/keywords/distributed-data-store","display_name":"Distributed data store","score":0.4465677738189697},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.4440114498138428},{"id":"https://openalex.org/keywords/distributed-database","display_name":"Distributed database","score":0.4423828721046448},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4357650578022003},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.36810797452926636},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.35794901847839355},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.1931404173374176}],"concepts":[{"id":"https://openalex.org/C2779599972","wikidata":"https://www.wikidata.org/wiki/Q82231","display_name":"NoSQL","level":3,"score":0.9079948663711548},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.87324059009552},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.8329038619995117},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.7188442349433899},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.658121645450592},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.4766070246696472},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.46148931980133057},{"id":"https://openalex.org/C24885549","wikidata":"https://www.wikidata.org/wiki/Q339678","display_name":"Distributed data store","level":2,"score":0.4465677738189697},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.4440114498138428},{"id":"https://openalex.org/C70061542","wikidata":"https://www.wikidata.org/wiki/Q989016","display_name":"Distributed database","level":2,"score":0.4423828721046448},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4357650578022003},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.36810797452926636},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.35794901847839355},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.1931404173374176},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/1779599.1779600","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1779599.1779600","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2010 Workshop on Massive Data Analytics on the Cloud","raw_type":"proceedings-article"},{"id":"pmh:oai:dspace.lib.ntua.gr:123456789/32831","is_oa":true,"landing_page_url":"http://doi.org/10.1145/1779599.1779600","pdf_url":null,"source":{"id":"https://openalex.org/S4306500159","display_name":"ACM International Conference Proceeding Series","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"conference"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ACM International Conference Proceeding Series","raw_type":"info:eu-repo/semantics/conferenceObject"}],"best_oa_location":{"id":"pmh:oai:dspace.lib.ntua.gr:123456789/32831","is_oa":true,"landing_page_url":"http://doi.org/10.1145/1779599.1779600","pdf_url":null,"source":{"id":"https://openalex.org/S4306500159","display_name":"ACM International Conference Proceeding Series","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"conference"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ACM International Conference Proceeding Series","raw_type":"info:eu-repo/semantics/conferenceObject"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.47999998927116394}],"awards":[{"id":"https://openalex.org/G8881705538","display_name":null,"funder_award_id":"FP6-34363","funder_id":"https://openalex.org/F4320334962","funder_display_name":"Sixth Framework Programme"}],"funders":[{"id":"https://openalex.org/F4320334962","display_name":"Sixth Framework Programme","ror":"https://ror.org/00k4n6c32"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W10197252","https://openalex.org/W224439989","https://openalex.org/W1261868982","https://openalex.org/W1981420413","https://openalex.org/W2087946700","https://openalex.org/W2098935637","https://openalex.org/W2110086534","https://openalex.org/W2119565742","https://openalex.org/W2125775320","https://openalex.org/W2141788659","https://openalex.org/W2153704625","https://openalex.org/W2173213060"],"related_works":["https://openalex.org/W2799973158","https://openalex.org/W2419153746","https://openalex.org/W3089119258","https://openalex.org/W2923327995","https://openalex.org/W2518340158","https://openalex.org/W3129662905","https://openalex.org/W2084547067","https://openalex.org/W3081448375","https://openalex.org/W1834679979","https://openalex.org/W2353225269"],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3],"present":[4],"a":[5,32,39],"distributed":[6],"architecture":[7],"for":[8],"indexing":[9,42,79],"and":[10,13,18,29,48,58,63],"serving":[11],"large":[12],"diverse":[14],"datasets.":[15],"It":[16],"incorporates":[17],"extends":[19],"the":[20,24,55,91,107],"functionality":[21],"of":[22,30,51,60,81,99,109],"Hadoop,":[23],"open":[25],"source":[26],"MapReduce":[27],"framework,":[28],"HBase,":[31],"distributed,":[33],"sparse,":[34],"NoSQL":[35],"database,":[36],"to":[37,76],"create":[38],"fully":[40],"parallel":[41],"system.":[43],"Experiments":[44],"with":[45,66],"structured,":[46],"semi-structured":[47],"unstructured":[49],"data":[50],"various":[52],"sizes":[53],"demonstrate":[54],"flexibility,":[56],"speed":[57],"robustness":[59],"our":[61],"implementation":[62],"contrast":[64],"it":[65],"similarly":[67],"oriented":[68],"projects.":[69],"Our":[70],"11":[71],"node":[72],"cluster":[73],"prototype":[74],"managed":[75],"keep":[77],"full-text":[78],"time":[80,94],"150GB":[82],"raw":[83],"content":[84],"in":[85,106],"less":[86],"than":[87,101],"3":[88],"hours,":[89],"whereas":[90],"system's":[92],"response":[93],"under":[95],"sustained":[96],"query":[97],"load":[98],"more":[100],"1000":[102],"queries/sec":[103],"was":[104],"kept":[105],"order":[108],"milliseconds.":[110]},"counts_by_year":[{"year":2019,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":2},{"year":2014,"cited_by_count":2},{"year":2013,"cited_by_count":7},{"year":2012,"cited_by_count":6}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
