{"id":"https://openalex.org/W2085036300","doi":"https://doi.org/10.1145/2597652.2597673","title":"Effective automatic computation placement and data allocation for parallelization of regular programs","display_name":"Effective automatic computation placement and data allocation for parallelization of regular programs","publication_year":2014,"publication_date":"2014-06-10","ids":{"openalex":"https://openalex.org/W2085036300","doi":"https://doi.org/10.1145/2597652.2597673","mag":"2085036300"},"language":"en","primary_location":{"id":"doi:10.1145/2597652.2597673","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2597652.2597673","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 28th ACM international conference on Supercomputing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109389309","display_name":"Chandan Reddy","orcid":null},"institutions":[{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Chandan Reddy","raw_affiliation_strings":["Indian Institute of Science, Bangalore, India"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Science, Bangalore, India","institution_ids":["https://openalex.org/I59270414"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5025342111","display_name":"Uday Bondhugula","orcid":"https://orcid.org/0000-0002-8297-6159"},"institutions":[{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Uday Bondhugula","raw_affiliation_strings":["Indian Institute of Science, Bangalore, India"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Science, Bangalore, India","institution_ids":["https://openalex.org/I59270414"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5109389309"],"corresponding_institution_ids":["https://openalex.org/I59270414"],"apc_list":null,"apc_paid":null,"fwci":2.7583,"has_fulltext":false,"cited_by_count":19,"citation_normalized_percentile":{"value":0.90553486,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"13","last_page":"22"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8651579022407532},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.836951494216919},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7899644374847412},{"id":"https://openalex.org/keywords/loop-tiling","display_name":"Loop tiling","score":0.6511757373809814},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5949431657791138},{"id":"https://openalex.org/keywords/distributed-memory","display_name":"Distributed memory","score":0.5402511954307556},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.5227741599082947},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.49781274795532227},{"id":"https://openalex.org/keywords/nested-loop-join","display_name":"Nested loop join","score":0.470106303691864},{"id":"https://openalex.org/keywords/data-parallelism","display_name":"Data parallelism","score":0.4235702455043793},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.36951661109924316},{"id":"https://openalex.org/keywords/shared-memory","display_name":"Shared memory","score":0.3561588525772095},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3307439088821411},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.275438129901886},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.263433575630188},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.11491972208023071}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8651579022407532},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.836951494216919},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7899644374847412},{"id":"https://openalex.org/C11799548","wikidata":"https://www.wikidata.org/wiki/Q6675847","display_name":"Loop tiling","level":3,"score":0.6511757373809814},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5949431657791138},{"id":"https://openalex.org/C91481028","wikidata":"https://www.wikidata.org/wiki/Q1054686","display_name":"Distributed memory","level":3,"score":0.5402511954307556},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.5227741599082947},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.49781274795532227},{"id":"https://openalex.org/C1306188","wikidata":"https://www.wikidata.org/wiki/Q4060687","display_name":"Nested loop join","level":2,"score":0.470106303691864},{"id":"https://openalex.org/C61483411","wikidata":"https://www.wikidata.org/wiki/Q3124522","display_name":"Data parallelism","level":3,"score":0.4235702455043793},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.36951661109924316},{"id":"https://openalex.org/C133875982","wikidata":"https://www.wikidata.org/wiki/Q764810","display_name":"Shared memory","level":2,"score":0.3561588525772095},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3307439088821411},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.275438129901886},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.263433575630188},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.11491972208023071},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2597652.2597673","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2597652.2597673","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 28th ACM international conference on Supercomputing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W1509197282","https://openalex.org/W1970141743","https://openalex.org/W1999360210","https://openalex.org/W2002897809","https://openalex.org/W2005999583","https://openalex.org/W2008117760","https://openalex.org/W2029687105","https://openalex.org/W2070922326","https://openalex.org/W2076517649","https://openalex.org/W2085844157","https://openalex.org/W2102056800","https://openalex.org/W2114665465","https://openalex.org/W2128249697","https://openalex.org/W2153401099","https://openalex.org/W2340604309","https://openalex.org/W3203568064"],"related_works":["https://openalex.org/W2112391112","https://openalex.org/W4254171597","https://openalex.org/W181286935","https://openalex.org/W1577903309","https://openalex.org/W2342605656","https://openalex.org/W2159360349","https://openalex.org/W1543710109","https://openalex.org/W2604754884","https://openalex.org/W2583056912","https://openalex.org/W2107219487"],"abstract_inverted_index":{"This":[0],"paper":[1],"proposes":[2],"techniques":[3,88],"for":[4,16,19,63,93,102,131,170,178],"data":[5,70,103,110,173,200],"allocation":[6,71,97,104],"and":[7,21,25,96,118,123],"computation":[8,47,91,132],"mapping":[9,133],"when":[10],"compiling":[11],"affine":[12],"loop":[13,65],"nest":[14],"sequences":[15,151],"distributed-memory":[17],"clusters.Techniques":[18],"transformation":[20],"detection":[22],"of":[23,27,98,109,152,159,193],"parallelism,":[24],"generation":[26],"communication":[28,61],"sets":[29],"relying":[30],"on":[31,121,149,183],"the":[32,69],"polyhedral":[33],"framework":[34],"already":[35],"exist.":[36],"However,":[37],"these":[38,82],"recent":[39],"approaches":[40],"used":[41,73],"a":[42,114,156,184,190],"simple":[43],"strategy":[44,72],"to":[45,48,59,89,116],"map":[46],"nodes":[49],"--":[50],"typically":[51],"block":[52],"or":[53],"block-cyclic.":[54],"These":[55],"mappings":[56,137],"may":[57],"lead":[58],"excess":[60],"volume":[62],"multiple":[64],"nests.":[66],"In":[67],"addition,":[68],"did":[74],"not":[75,199],"permit":[76],"efficient":[77],"weak":[78,168],"scaling.":[79],"We":[80,126],"address":[81],"complementary":[83],"problems":[84],"by":[85,107],"proposing":[86],"automatic":[87],"determine":[90],"placements":[92],"identified":[94],"parallelism":[95],"data.":[99],"Our":[100],"approach":[101,130],"is":[105,198],"driven":[106],"tiling":[108,174],"spaces":[111],"along":[112],"with":[113,164],"scheme":[115],"allocate":[117],"deallocate":[119],"tiles":[120],"demand":[122],"reuse":[124],"them.":[125],"show":[127],"that":[128,140,197],"our":[129],"yields":[134],"more":[135],"effective":[136],"than":[138],"those":[139],"can":[141],"be":[142],"developed":[143],"using":[144],"vendor-supplied":[145],"libraries.":[146],"Experimental":[147,181],"results":[148,182],"some":[150],"BLAS":[153],"calls":[154],"demonstrate":[155],"mean":[157,191],"speedup":[158,192],"1.82x":[160],"over":[161,195],"versions":[162],"written":[163],"ScaLAPACK.":[165],"Besides":[166],"enabling":[167],"scaling":[169],"distributed":[171],"memory,":[172],"also":[175],"improves":[176],"locality":[177],"shared-memory":[179,186],"parallelization.":[180],"32-core":[185],"SMP":[187],"system":[188],"shows":[189],"2.67x":[194],"code":[196],"tiled.":[201]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":5}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
