{"id":"https://openalex.org/W1993603169","doi":"https://doi.org/10.1145/1851476.1851540","title":"Reshaping text data for efficient processing on Amazon EC2","display_name":"Reshaping text data for efficient processing on Amazon EC2","publication_year":2010,"publication_date":"2010-06-21","ids":{"openalex":"https://openalex.org/W1993603169","doi":"https://doi.org/10.1145/1851476.1851540","mag":"1993603169"},"language":"en","primary_location":{"id":"doi:10.1145/1851476.1851540","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1851476.1851540","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 19th ACM International Symposium on High Performance Distributed Computing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5040809469","display_name":"Gabriela Turcu","orcid":null},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Gabriela Turcu","raw_affiliation_strings":["University of Chicago, Chicago, Illinois"],"affiliations":[{"raw_affiliation_string":"University of Chicago, Chicago, Illinois","institution_ids":["https://openalex.org/I40347166"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032231503","display_name":"Ian Foster","orcid":"https://orcid.org/0000-0003-2129-5269"},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ian Foster","raw_affiliation_strings":["University of Chicago, Chicago, Illinois"],"affiliations":[{"raw_affiliation_string":"University of Chicago, Chicago, Illinois","institution_ids":["https://openalex.org/I40347166"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5086089182","display_name":"Svetlozar Nestorov","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Svetlozar Nestorov","raw_affiliation_strings":["Computation Institute, Chicago, Illinois"],"affiliations":[{"raw_affiliation_string":"Computation Institute, Chicago, Illinois","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5040809469"],"corresponding_institution_ids":["https://openalex.org/I40347166"],"apc_list":null,"apc_paid":null,"fwci":6.937,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.96642726,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"435","last_page":"444"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8991525173187256},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.6661282777786255},{"id":"https://openalex.org/keywords/provisioning","display_name":"Provisioning","score":0.6130204796791077},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.537850558757782},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4805063009262085},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.47056102752685547},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4477699100971222},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.40294837951660156},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.36122655868530273},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.3400397002696991},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.1619599461555481},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.13598856329917908}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8991525173187256},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.6661282777786255},{"id":"https://openalex.org/C172191483","wikidata":"https://www.wikidata.org/wiki/Q1071806","display_name":"Provisioning","level":2,"score":0.6130204796791077},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.537850558757782},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4805063009262085},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.47056102752685547},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4477699100971222},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.40294837951660156},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.36122655868530273},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3400397002696991},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.1619599461555481},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.13598856329917908},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1851476.1851540","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1851476.1851540","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 19th ACM International Symposium on High Performance Distributed Computing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W95608104","https://openalex.org/W1581344573","https://openalex.org/W1918699760","https://openalex.org/W2017733144","https://openalex.org/W2090244259","https://openalex.org/W2103128343","https://openalex.org/W2105854031","https://openalex.org/W2105908337","https://openalex.org/W2107894622","https://openalex.org/W2109537757","https://openalex.org/W2116358491","https://openalex.org/W2136717145","https://openalex.org/W2137296062","https://openalex.org/W2141782735","https://openalex.org/W2152862405","https://openalex.org/W2154158105","https://openalex.org/W2156165081","https://openalex.org/W2160402951","https://openalex.org/W2283571372","https://openalex.org/W2289411471","https://openalex.org/W3100379359","https://openalex.org/W4321637318"],"related_works":["https://openalex.org/W2366107444","https://openalex.org/W4388145910","https://openalex.org/W2893779165","https://openalex.org/W2381570729","https://openalex.org/W1976205134","https://openalex.org/W4248336175","https://openalex.org/W2017432143","https://openalex.org/W2785227142","https://openalex.org/W572531444","https://openalex.org/W2945962340"],"abstract_inverted_index":{"Text":[0],"analysis":[1],"tools":[2],"are":[3,12],"nowadays":[4],"required":[5],"to":[6,48,76,89,98,116],"process":[7],"increasingly":[8],"large":[9],"corpora":[10],"which":[11],"often":[13],"organized":[14],"as":[15,118,120],"small":[16,158],"files":[17,113],"(abstracts,":[18],"news":[19],"articles,":[20],"etc).":[21],"Cloud":[22],"computing":[23,29],"offers":[24],"a":[25,50,168],"convenient,":[26],"on-demand,":[27],"pay-as-you-go":[28],"environment":[30],"for":[31,95],"solving":[32],"such":[33],"problems.":[34],"We":[35,60],"investigate":[36],"provisioning":[37],"on":[38,62,71,155,157],"the":[39,44,63,67,101,108,122,130,134,141,149],"Amazon":[40],"EC2":[41],"cloud":[42],"from":[43],"user":[45,169],"perspective,":[46],"attempting":[47],"provide":[49],"scheduling":[51],"strategy":[52],"that":[53,166],"is":[54,88],"both":[55],"timely":[56],"and":[57],"cost":[58],"effective.":[59],"rely":[61],"empirical":[64],"performance":[65,86,150],"of":[66,69,74,84,132,136,148,151],"application":[68,97,153],"interest":[70],"smaller":[72],"subsets":[73],"data,":[75],"construct":[77],"an":[78,91,163],"execution":[79,164],"plan.":[80],"A":[81],"first":[82,103],"goal":[83],"our":[85,96,137,152],"measurements":[87,156],"determine":[90],"optimal":[92],"file":[93,124],"size":[94],"consume.":[99],"Using":[100,146],"subset-sum":[102],"fit":[104],"heuristic":[105],"we":[106,161],"reshape":[107],"input":[109],"data":[110,159],"by":[111,139],"merging":[112],"in":[114],"order":[115],"match":[117],"closely":[119],"possible":[121],"desired":[123],"size.":[125],"This":[126],"also":[127],"speeds":[128],"up":[129],"task":[131],"retrieving":[133],"results":[135],"application,":[138],"having":[140],"output":[142],"be":[143],"less":[144],"segmented.":[145],"predictions":[147],"based":[154],"sets,":[160],"devise":[162],"plan":[165],"meets":[167],"specified":[170],"deadline":[171],"while":[172],"minimizing":[173],"cost.":[174]},"counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":1},{"year":2013,"cited_by_count":5},{"year":2012,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
