{"id":"https://openalex.org/W3090621879","doi":"https://doi.org/10.1145/2063384.2063463","title":"A distributed look-up architecture for text mining applications using MapReduce","display_name":"A distributed look-up architecture for text mining applications using MapReduce","publication_year":2011,"publication_date":"2011-11-08","ids":{"openalex":"https://openalex.org/W3090621879","doi":"https://doi.org/10.1145/2063384.2063463","mag":"3090621879"},"language":"en","primary_location":{"id":"doi:10.1145/2063384.2063463","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2063384.2063463","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5086058099","display_name":"Atilla Soner Balkir","orcid":null},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Atilla Soner Balkir","raw_affiliation_strings":["University of Chicago, Chicago, IL"],"affiliations":[{"raw_affiliation_string":"University of Chicago, Chicago, IL","institution_ids":["https://openalex.org/I40347166"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032231503","display_name":"Ian Foster","orcid":"https://orcid.org/0000-0003-2129-5269"},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ian Foster","raw_affiliation_strings":["University of Chicago, Chicago, IL"],"affiliations":[{"raw_affiliation_string":"University of Chicago, Chicago, IL","institution_ids":["https://openalex.org/I40347166"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5004739989","display_name":"Andrey Rzhetsky","orcid":"https://orcid.org/0000-0001-6959-7405"},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Andrey Rzhetsky","raw_affiliation_strings":["University of Chicago, Chicago, IL"],"affiliations":[{"raw_affiliation_string":"University of Chicago, Chicago, IL","institution_ids":["https://openalex.org/I40347166"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5086058099"],"corresponding_institution_ids":["https://openalex.org/I40347166"],"apc_list":null,"apc_paid":null,"fwci":0.7002,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.76167107,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"11"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9926999807357788,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9921000003814697,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.909098744392395},{"id":"https://openalex.org/keywords/implementation","display_name":"Implementation","score":0.5766786336898804},{"id":"https://openalex.org/keywords/replicate","display_name":"Replicate","score":0.4585312604904175},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.393848717212677},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.378410667181015},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.33590075373649597},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.16281098127365112}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.909098744392395},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.5766786336898804},{"id":"https://openalex.org/C2781162219","wikidata":"https://www.wikidata.org/wiki/Q26250693","display_name":"Replicate","level":2,"score":0.4585312604904175},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.393848717212677},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.378410667181015},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.33590075373649597},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.16281098127365112},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2063384.2063463","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2063384.2063463","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.550000011920929,"id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G2547184436","display_name":null,"funder_award_id":"R01LM010132","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"}],"funders":[{"id":"https://openalex.org/F4320309626","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28"},{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W145937141","https://openalex.org/W1976860187","https://openalex.org/W2006969979","https://openalex.org/W2031302834","https://openalex.org/W2102130607","https://openalex.org/W2109664771","https://openalex.org/W2109722477","https://openalex.org/W2122228338","https://openalex.org/W2122410182","https://openalex.org/W2123845384","https://openalex.org/W2127797489","https://openalex.org/W2132399973","https://openalex.org/W2136579661","https://openalex.org/W2140551944","https://openalex.org/W2145766604","https://openalex.org/W2148212498","https://openalex.org/W2167927436","https://openalex.org/W2173213060","https://openalex.org/W2189465200","https://openalex.org/W2412336808","https://openalex.org/W2624304035","https://openalex.org/W2950627632","https://openalex.org/W2998215494","https://openalex.org/W4242589337","https://openalex.org/W4254365197","https://openalex.org/W6602009598","https://openalex.org/W6676367512","https://openalex.org/W6676373471"],"related_works":["https://openalex.org/W4254851101","https://openalex.org/W3171007296","https://openalex.org/W22115721","https://openalex.org/W2211931904","https://openalex.org/W2065444835","https://openalex.org/W2321234655","https://openalex.org/W2952773340","https://openalex.org/W2470062578","https://openalex.org/W2981861370","https://openalex.org/W2982204590"],"abstract_inverted_index":{"We":[0,101],"study":[1],"text":[2],"analysis":[3,55],"algorithms":[4],"that":[5,14,35,92],"use":[6,118],"global":[7],"optimization":[8],"methods":[9,62,73],"to":[10,66,83,89,136,153],"compute":[11],"local":[12],"characteristics":[13],"are":[15],"consistent":[16],"with":[17],"properties":[18],"of":[19,46,56,150],"the":[20,32,99,111],"entire":[21],"corpus":[22],"rather":[23],"than":[24],"computed":[25],"locally":[26],"based":[27],"on":[28,69],"exogenous":[29],"parameters.":[30],"In":[31,139],"iterative":[33],"implementations":[34],"we":[36,59,144],"consider,":[37],"each":[38],"step":[39],"both":[40],"reads":[41],"and":[42,79,115,131],"updates":[43],"a":[44,51,148],"database":[45],"parameter":[47],"values.":[48],"Motivated":[49],"by":[50,147],"need":[52],"for":[53,63],"rapid":[54],"large":[57],"corpora,":[58],"have":[60],"developed":[61],"efficient":[64],"access":[65],"such":[67],"databases":[68],"parallel":[70],"computers.":[71],"These":[72],"combine":[74],"Bloom":[75],"filters,":[76],"in-memory":[77],"caches,":[78],"an":[80,154],"HBase":[81],"cluster":[82],"reduce":[84],"communication":[85],"costs":[86],"greatly":[87],"relative":[88,135,152],"simpler":[90],"approaches":[91],"either":[93],"fully":[94,97],"distribute":[95],"or":[96],"replicate":[98],"database.":[100],"also":[102],"describe":[103],"how":[104],"this":[105],"method":[106],"can":[107,125],"be":[108],"incorporated":[109],"into":[110],"MapReduce":[112],"programming":[113],"model,":[114],"illustrate":[116],"its":[117],"within":[119],"phrase":[120,141],"segmentation":[121,142],"programs.":[122],"Our":[123],"design":[124],"achieve":[126],"considerable":[127],"run":[128],"time,":[129],"latency":[130],"storage":[132],"space":[133],"improvements":[134],"other":[137],"methods.":[138],"one":[140],"application,":[143],"improve":[145],"performance":[146],"factor":[149],"six":[151],"HBase-based":[155],"implementation.":[156]},"counts_by_year":[{"year":2019,"cited_by_count":2},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":1},{"year":2013,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
