{"id":"https://openalex.org/W1984506768","doi":"https://doi.org/10.1145/2717311","title":"Accelerating Divergent Applications on SIMD Architectures Using Neural Networks","display_name":"Accelerating Divergent Applications on SIMD Architectures Using Neural Networks","publication_year":2015,"publication_date":"2015-03-09","ids":{"openalex":"https://openalex.org/W1984506768","doi":"https://doi.org/10.1145/2717311","mag":"1984506768"},"language":"en","primary_location":{"id":"doi:10.1145/2717311","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2717311","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/2717311","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/2717311","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5059237344","display_name":"Beayna Grigorian","orcid":null},"institutions":[{"id":"https://openalex.org/I161318765","display_name":"University of California, Los Angeles","ror":"https://ror.org/046rm7j60","country_code":"US","type":"education","lineage":["https://openalex.org/I161318765"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Beayna Grigorian","raw_affiliation_strings":["University of California, Los Angeles, CA","University of California, Los Angeles. CA"],"affiliations":[{"raw_affiliation_string":"University of California, Los Angeles, CA","institution_ids":["https://openalex.org/I161318765"]},{"raw_affiliation_string":"University of California, Los Angeles. CA","institution_ids":["https://openalex.org/I161318765"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5083493225","display_name":"Glenn Reinman","orcid":null},"institutions":[{"id":"https://openalex.org/I161318765","display_name":"University of California, Los Angeles","ror":"https://ror.org/046rm7j60","country_code":"US","type":"education","lineage":["https://openalex.org/I161318765"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Glenn Reinman","raw_affiliation_strings":["University of California, Los Angeles, CA","University of California, Los Angeles. CA"],"affiliations":[{"raw_affiliation_string":"University of California, Los Angeles, CA","institution_ids":["https://openalex.org/I161318765"]},{"raw_affiliation_string":"University of California, Los Angeles. CA","institution_ids":["https://openalex.org/I161318765"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5059237344"],"corresponding_institution_ids":["https://openalex.org/I161318765"],"apc_list":null,"apc_paid":null,"fwci":3.2975,"has_fulltext":true,"cited_by_count":19,"citation_normalized_percentile":{"value":0.91968912,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"12","issue":"1","first_page":"1","last_page":"23"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8784548044204712},{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.7113519906997681},{"id":"https://openalex.org/keywords/control-flow","display_name":"Control flow","score":0.6336976289749146},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.6012172698974609},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5829559564590454},{"id":"https://openalex.org/keywords/divergence","display_name":"Divergence (linguistics)","score":0.5235438346862793},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.5114585161209106},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.44806256890296936},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.4430678188800812},{"id":"https://openalex.org/keywords/graphics-processing-unit","display_name":"Graphics processing unit","score":0.4337170124053955},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.4316054880619049},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.42465639114379883},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4199499487876892},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.25946667790412903},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.2287495732307434},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.10039812326431274}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8784548044204712},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.7113519906997681},{"id":"https://openalex.org/C160191386","wikidata":"https://www.wikidata.org/wiki/Q868299","display_name":"Control flow","level":2,"score":0.6336976289749146},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.6012172698974609},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5829559564590454},{"id":"https://openalex.org/C207390915","wikidata":"https://www.wikidata.org/wiki/Q1230525","display_name":"Divergence (linguistics)","level":2,"score":0.5235438346862793},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.5114585161209106},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.44806256890296936},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.4430678188800812},{"id":"https://openalex.org/C2779851693","wikidata":"https://www.wikidata.org/wiki/Q183484","display_name":"Graphics processing unit","level":2,"score":0.4337170124053955},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.4316054880619049},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.42465639114379883},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4199499487876892},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.25946667790412903},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2287495732307434},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.10039812326431274},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2717311","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2717311","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/2717311","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/2717311","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2717311","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/2717311","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","score":0.8999999761581421,"display_name":"Affordable and clean energy"}],"awards":[{"id":"https://openalex.org/G1372563335","display_name":"Customizable Domain-Specific Computing","funder_award_id":"0926127","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6811296919","display_name":null,"funder_award_id":"C-FAR","funder_id":"https://openalex.org/F4320332180","funder_display_name":"Defense Advanced Research Projects Agency"},{"id":"https://openalex.org/G6865471441","display_name":null,"funder_award_id":"CCF-0926127","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6866006471","display_name":null,"funder_award_id":"926127","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8009827040","display_name":"GRADUATE RESEARCH FELLOWSHIP PROGRAM","funder_award_id":"0707424","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W1984506768.pdf","grobid_xml":"https://content.openalex.org/works/W1984506768.grobid-xml"},"referenced_works_count":46,"referenced_works":["https://openalex.org/W164384110","https://openalex.org/W1498436455","https://openalex.org/W1973538724","https://openalex.org/W1981473264","https://openalex.org/W1988115241","https://openalex.org/W2010966003","https://openalex.org/W2026764611","https://openalex.org/W2037743346","https://openalex.org/W2042456747","https://openalex.org/W2068810256","https://openalex.org/W2076304675","https://openalex.org/W2089162427","https://openalex.org/W2090584832","https://openalex.org/W2090963365","https://openalex.org/W2105544671","https://openalex.org/W2107220315","https://openalex.org/W2114703523","https://openalex.org/W2116267755","https://openalex.org/W2119299853","https://openalex.org/W2120585153","https://openalex.org/W2124776405","https://openalex.org/W2128022558","https://openalex.org/W2128317332","https://openalex.org/W2132587889","https://openalex.org/W2133218851","https://openalex.org/W2135947393","https://openalex.org/W2138565468","https://openalex.org/W2142883190","https://openalex.org/W2143283746","https://openalex.org/W2155503253","https://openalex.org/W2155568054","https://openalex.org/W2156540297","https://openalex.org/W2156831150","https://openalex.org/W2157963512","https://openalex.org/W2167399819","https://openalex.org/W2169150396","https://openalex.org/W2169621283","https://openalex.org/W2169875292","https://openalex.org/W2169880332","https://openalex.org/W2170881177","https://openalex.org/W2187230075","https://openalex.org/W2285257517","https://openalex.org/W2335790294","https://openalex.org/W4239437589","https://openalex.org/W4240237526","https://openalex.org/W4253998042"],"related_works":["https://openalex.org/W2534771569","https://openalex.org/W2037547261","https://openalex.org/W2161873733","https://openalex.org/W2107831078","https://openalex.org/W4248145683","https://openalex.org/W2100579514","https://openalex.org/W1672168401","https://openalex.org/W1979287416","https://openalex.org/W2798404616","https://openalex.org/W3013258932"],"abstract_inverted_index":{"The":[0],"purpose":[1],"of":[2,15,58,182,198,204],"this":[3,151],"research":[4],"is":[5,131],"to":[6,11,79,89],"find":[7],"a":[8,140,180,187],"neural-network-based":[9],"solution":[10],"the":[12,56,67,74,95,100,105,155],"well-known":[13],"problem":[14],"branch":[16,33,80],"divergence":[17],"in":[18,63],"Single":[19],"Instruction":[20],"Multiple":[21],"Data":[22],"(SIMD)":[23],"architectures.":[24,149],"Our":[25],"approach":[26,178],"differs":[27],"from":[28],"existing":[29],"techniques":[30],"that":[31,72],"handle":[32],"(or":[34,70],"control-flow)":[35],"divergence,":[36],"which":[37],"use":[38],"costly":[39],"hardware":[40],"modifications,":[41],"low-utilization":[42],"masking":[43],"techniques,":[44],"or":[45],"static":[46],"prediction":[47],"methods.":[48],"As":[49,122],"we":[50,54,153,192],"examine":[51],"divergent":[52,145,183],"applications,":[53],"characterize":[55],"degree":[57],"data-dependent":[59],"control":[60,112],"flow":[61,113,160],"seen":[62],"each":[64],"and":[65,93,135,166,201],"isolate":[66],"code":[68,128],"regions":[69],"\u201ckernels\u201d)":[71],"cause":[73],"most":[75],"performance":[76,196],"degradation":[77],"due":[78],"divergence.":[81],"We":[82],"then":[83],"train":[84],"neural":[85],"networks":[86],"(NNs)":[87],"offline":[88],"approximate":[90],"these":[91],"kernels":[92,106],"inject":[94],"NN":[96,164,167],"computations":[97],"directly":[98],"into":[99,114],"applications":[101,146,184],"as":[102,139,169,171],"substitutes":[103],"for":[104,120,143,161],"they":[107],"approximate.":[108],"This":[109],"essentially":[110],"translates":[111],"nondivergent":[115],"computation,":[116],"trading":[117],"off":[118],"precision":[119],"performance.":[121],"our":[123,177],"methodology":[124],"manipulates":[125],"application":[126],"source":[127],"directly,":[129],"it":[130],"inherently":[132],"platform":[133],"agnostic":[134],"can":[136],"be":[137],"adopted":[138],"general":[141],"means":[142],"accelerating":[144],"on":[147,179,186,193],"data-parallel":[148],"In":[150],"article,":[152],"present":[154],"Neuralizer,":[156],"an":[157],"automated":[158],"software":[159],"kernel":[162],"identification,":[163],"training,":[165],"integration,":[168],"well":[170],"supplementary":[172],"user-controlled":[173],"optimization":[174],"techniques.":[175],"Evaluating":[176],"variety":[181],"run":[185],"Graphics":[188],"Processing":[189],"Unit":[190],"(GPU),":[191],"average":[194],"achieve":[195],"gains":[197],"13.6":[199],"\u00d7":[200,206],"energy":[202],"savings":[203],"14.8":[205],"with":[207],"96%":[208],"accuracy.":[209]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":8},{"year":2016,"cited_by_count":2}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
