{"id":"https://openalex.org/W2996123223","doi":"https://doi.org/10.1145/3368304","title":"Nested MIMD-SIMD Parallelization for Heterogeneous Microprocessors","display_name":"Nested MIMD-SIMD Parallelization for Heterogeneous Microprocessors","publication_year":2019,"publication_date":"2019-12-17","ids":{"openalex":"https://openalex.org/W2996123223","doi":"https://doi.org/10.1145/3368304","mag":"2996123223"},"language":"en","primary_location":{"id":"doi:10.1145/3368304","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3368304","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3368304","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3368304","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087383979","display_name":"Daniel Gerzhoy","orcid":"https://orcid.org/0000-0002-4277-9994"},"institutions":[{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Daniel Gerzhoy","raw_affiliation_strings":["University of Maryland at College Park, College Park, MD"],"raw_orcid":"https://orcid.org/0000-0002-4277-9994","affiliations":[{"raw_affiliation_string":"University of Maryland at College Park, College Park, MD","institution_ids":["https://openalex.org/I66946132"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073797317","display_name":"Xiaowu Sun","orcid":"https://orcid.org/0000-0003-0274-8955"},"institutions":[{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiaowu Sun","raw_affiliation_strings":["University of Maryland at College Park, College Park, MD"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Maryland at College Park, College Park, MD","institution_ids":["https://openalex.org/I66946132"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077915520","display_name":"Michael Zuzak","orcid":"https://orcid.org/0000-0003-0356-9393"},"institutions":[{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michael Zuzak","raw_affiliation_strings":["University of Maryland at College Park, College Park, MD"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Maryland at College Park, College Park, MD","institution_ids":["https://openalex.org/I66946132"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042239243","display_name":"Donald Yeung","orcid":"https://orcid.org/0000-0003-0341-2644"},"institutions":[{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Donald Yeung","raw_affiliation_strings":["University of Maryland at College Park, College Park, MD"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Maryland at College Park, College Park, MD","institution_ids":["https://openalex.org/I66946132"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5087383979"],"corresponding_institution_ids":["https://openalex.org/I66946132"],"apc_list":null,"apc_paid":null,"fwci":0.7404,"has_fulltext":true,"cited_by_count":7,"citation_normalized_percentile":{"value":0.70891365,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"16","issue":"4","first_page":"1","last_page":"27"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.991599977016449,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.8896356821060181},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8731037378311157},{"id":"https://openalex.org/keywords/mimd","display_name":"MIMD","score":0.8467402458190918},{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.7497439384460449},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.7096412777900696},{"id":"https://openalex.org/keywords/nested-loop-join","display_name":"Nested loop join","score":0.4993898868560791},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.43488311767578125}],"concepts":[{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.8896356821060181},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8731037378311157},{"id":"https://openalex.org/C21032095","wikidata":"https://www.wikidata.org/wiki/Q1149237","display_name":"MIMD","level":2,"score":0.8467402458190918},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.7497439384460449},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.7096412777900696},{"id":"https://openalex.org/C1306188","wikidata":"https://www.wikidata.org/wiki/Q4060687","display_name":"Nested loop join","level":2,"score":0.4993898868560791},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.43488311767578125}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3368304","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3368304","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3368304","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3368304","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3368304","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3368304","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","score":0.49000000953674316,"display_name":"Affordable and clean energy"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2996123223.pdf","grobid_xml":"https://content.openalex.org/works/W2996123223.grobid-xml"},"referenced_works_count":28,"referenced_works":["https://openalex.org/W309890564","https://openalex.org/W1495954130","https://openalex.org/W1865312937","https://openalex.org/W1969863734","https://openalex.org/W1979147374","https://openalex.org/W1979717209","https://openalex.org/W1996353004","https://openalex.org/W2065392434","https://openalex.org/W2078994750","https://openalex.org/W2090278477","https://openalex.org/W2106562406","https://openalex.org/W2114067856","https://openalex.org/W2115711516","https://openalex.org/W2123440268","https://openalex.org/W2165039583","https://openalex.org/W2169049902","https://openalex.org/W2171641226","https://openalex.org/W2178430749","https://openalex.org/W2345855859","https://openalex.org/W2899265977","https://openalex.org/W2912560299","https://openalex.org/W2914237411","https://openalex.org/W2914861687","https://openalex.org/W2914935709","https://openalex.org/W3155136669","https://openalex.org/W4255716867","https://openalex.org/W6675771560","https://openalex.org/W6681969235"],"related_works":["https://openalex.org/W1980986440","https://openalex.org/W3158884034","https://openalex.org/W4388314704","https://openalex.org/W1971361763","https://openalex.org/W2014030893","https://openalex.org/W2983282793","https://openalex.org/W2167983067","https://openalex.org/W2065177255","https://openalex.org/W2125719717","https://openalex.org/W2099629705"],"abstract_inverted_index":{"Heterogeneous":[0],"microprocessors":[1],"integrate":[2],"a":[3,27,99,181,190,193,230],"CPU":[4,60,119],"and":[5,15,36,46,61,140,161,183,192,204,207,212,232,238],"GPU":[6,62,134,138],"on":[7,20,32,117,132,189],"the":[8,33,38,59,114,125,133,142,217,236],"same":[9],"chip,":[10],"providing":[11],"fast":[12],"CPU-GPU":[13,80,159],"communication":[14],"enabling":[16],"cores":[17],"to":[18,66,167,216],"compute":[19],"data":[21,93],"\u201cin":[22],"place.\u201d":[23],"This":[24,136],"permits":[25],"exploiting":[26,75],"finer":[28],"granularity":[29],"of":[30,40,124,155],"parallelism":[31,55,77,156],"integrated":[34,79,158],"GPUs,":[35],"enables":[37],"use":[39],"GPUs":[41],"for":[42,78,84,157,235],"accelerating":[43],"more":[44,91],"complex":[45],"irregular":[47,106],"codes.":[48],"One":[49],"challenge,":[50],"however,":[51],"is":[52,164],"exposing":[53],"enough":[54],"such":[56,148,169],"that":[57,103,147],"both":[58],"are":[63,96],"effectively":[64],"utilized":[65],"achieve":[67],"maximum":[68],"gain.":[69],"In":[70],"this":[71],"article,":[72],"we":[73],"propose":[74],"nested":[76,97,149,177],"chips.":[81],"We":[82,145],"look":[83],"loop":[85,102,116],"structures":[86],"in":[87,171],"which":[88],"one":[89],"or":[90,222],"regular":[92,127],"parallel":[94,100],"loops":[95],"within":[98],"outer":[101,115,143],"can":[104,129],"contain":[105],"code":[107],"(e.g.,":[108],"with":[109],"control":[110],"divergence).":[111],"By":[112],"scheduling":[113],"multiple":[118,121],"cores,":[120],"dynamic":[122],"instances":[123],"inner":[126],"loop(s)":[128],"be":[130],"scheduled":[131],"cores.":[135],"boosts":[137],"utilization":[139],"parallelizes":[141],"loop.":[144],"find":[146],"MIMD-SIMD":[150,178],"parallelization":[151,170,179,201,209],"provides":[152,180,229],"greater":[153],"levels":[154],"chips,":[160],"additionally":[162],"there":[163],"ample":[165],"opportunity":[166],"perform":[168],"OpenMP":[172],"programs.":[173],"Our":[174,197],"results":[175],"show":[176],"16.1x":[182],"8.67x":[184],"speedup":[185,234],"over":[186],"sequential":[187],"execution":[188],"simulator":[191,237],"physical":[194,239],"machine,":[195,240],"respectively.":[196,214,241],"technique":[198],"beats":[199],"CPU-only":[200],"by":[202,210],"4.13x":[203],"2.40x,":[205],"respectively,":[206],"GPU-only":[208,223],"2.74x":[211],"2.26x,":[213],"Compared":[215],"next-best":[218],"scheme":[219],"(either":[220],"CPU-":[221],"parallelization)":[224],"per":[225],"benchmark,":[226],"our":[227],"approach":[228],"1.46x":[231],"1.23x":[233]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
