{"id":"https://openalex.org/W1979790590","doi":"https://doi.org/10.1145/2632215","title":"A Portable Optimization Engine for Accelerating Irregular Data-Traversal Applications on SIMD Architectures","display_name":"A Portable Optimization Engine for Accelerating Irregular Data-Traversal Applications on SIMD Architectures","publication_year":2014,"publication_date":"2014-06-01","ids":{"openalex":"https://openalex.org/W1979790590","doi":"https://doi.org/10.1145/2632215","mag":"1979790590"},"language":"en","primary_location":{"id":"doi:10.1145/2632215","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2632215","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/2632215","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/2632215","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058025888","display_name":"Bin Ren","orcid":"https://orcid.org/0000-0002-4116-5237"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Bin Ren","raw_affiliation_strings":["The Ohio State University, Columbus, OH","The Ohio State University, Columbus (OH)#TAB#"],"affiliations":[{"raw_affiliation_string":"The Ohio State University, Columbus, OH","institution_ids":["https://openalex.org/I52357470"]},{"raw_affiliation_string":"The Ohio State University, Columbus (OH)#TAB#","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013212213","display_name":"Todd Mytkowicz","orcid":null},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Todd Mytkowicz","raw_affiliation_strings":["Microsoft Research, Redmond, WA"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Redmond, WA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5025342178","display_name":"Gagan Agrawal","orcid":"https://orcid.org/0000-0002-2923-5327"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gagan Agrawal","raw_affiliation_strings":["The Ohio State University, Columbus, OH","The Ohio State University, Columbus (OH)#TAB#"],"affiliations":[{"raw_affiliation_string":"The Ohio State University, Columbus, OH","institution_ids":["https://openalex.org/I52357470"]},{"raw_affiliation_string":"The Ohio State University, Columbus (OH)#TAB#","institution_ids":["https://openalex.org/I52357470"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5058025888"],"corresponding_institution_ids":["https://openalex.org/I52357470"],"apc_list":null,"apc_paid":null,"fwci":0.315,"has_fulltext":true,"cited_by_count":9,"citation_normalized_percentile":{"value":0.57241497,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"11","issue":"2","first_page":"1","last_page":"31"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8927629590034485},{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.8346161842346191},{"id":"https://openalex.org/keywords/software-portability","display_name":"Software portability","score":0.8339953422546387},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7561160326004028},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.7231807708740234},{"id":"https://openalex.org/keywords/traverse","display_name":"Traverse","score":0.6763230562210083},{"id":"https://openalex.org/keywords/tree-traversal","display_name":"Tree traversal","score":0.6615800261497498},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.6364309787750244},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.4734886586666107},{"id":"https://openalex.org/keywords/instruction-set","display_name":"Instruction set","score":0.43816789984703064},{"id":"https://openalex.org/keywords/data-parallelism","display_name":"Data parallelism","score":0.4294564723968506},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4138139486312866},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.3649490475654602},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.34560075402259827},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.16582170128822327}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8927629590034485},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.8346161842346191},{"id":"https://openalex.org/C63000827","wikidata":"https://www.wikidata.org/wiki/Q3080428","display_name":"Software portability","level":2,"score":0.8339953422546387},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7561160326004028},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.7231807708740234},{"id":"https://openalex.org/C176809094","wikidata":"https://www.wikidata.org/wiki/Q15401496","display_name":"Traverse","level":2,"score":0.6763230562210083},{"id":"https://openalex.org/C140745168","wikidata":"https://www.wikidata.org/wiki/Q1210082","display_name":"Tree traversal","level":2,"score":0.6615800261497498},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.6364309787750244},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.4734886586666107},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.43816789984703064},{"id":"https://openalex.org/C61483411","wikidata":"https://www.wikidata.org/wiki/Q3124522","display_name":"Data parallelism","level":3,"score":0.4294564723968506},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4138139486312866},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.3649490475654602},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.34560075402259827},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.16582170128822327},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2632215","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2632215","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/2632215","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/2632215","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2632215","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/2632215","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G704000355","display_name":"SHF: Small: Advanced Compiler Techniques for Meeting Fault Tolerance Needs of  HPC Systems","funder_award_id":"1319420","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320337387","display_name":"Division of Computing and Communication Foundations","ror":"https://ror.org/01mng8331"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W1979790590.pdf","grobid_xml":"https://content.openalex.org/works/W1979790590.grobid-xml"},"referenced_works_count":64,"referenced_works":["https://openalex.org/W73145541","https://openalex.org/W112400010","https://openalex.org/W1485372940","https://openalex.org/W1504291959","https://openalex.org/W1525058640","https://openalex.org/W1552569371","https://openalex.org/W1558120281","https://openalex.org/W1674877186","https://openalex.org/W1964145938","https://openalex.org/W1965122136","https://openalex.org/W1968226734","https://openalex.org/W1977512185","https://openalex.org/W1985239701","https://openalex.org/W1985291160","https://openalex.org/W1992425763","https://openalex.org/W1997162567","https://openalex.org/W2016352575","https://openalex.org/W2021685712","https://openalex.org/W2029891792","https://openalex.org/W2040281526","https://openalex.org/W2051250732","https://openalex.org/W2055497547","https://openalex.org/W2057623365","https://openalex.org/W2058950969","https://openalex.org/W2072607050","https://openalex.org/W2087634916","https://openalex.org/W2095407217","https://openalex.org/W2095677000","https://openalex.org/W2097061393","https://openalex.org/W2097909406","https://openalex.org/W2104651752","https://openalex.org/W2110893883","https://openalex.org/W2112629511","https://openalex.org/W2116730320","https://openalex.org/W2128329055","https://openalex.org/W2132967231","https://openalex.org/W2134826720","https://openalex.org/W2140080674","https://openalex.org/W2140418760","https://openalex.org/W2141170493","https://openalex.org/W2142761843","https://openalex.org/W2143114052","https://openalex.org/W2147193503","https://openalex.org/W2149710423","https://openalex.org/W2151224499","https://openalex.org/W2156440470","https://openalex.org/W2160404300","https://openalex.org/W2160836912","https://openalex.org/W2161061943","https://openalex.org/W2161694911","https://openalex.org/W2161775028","https://openalex.org/W2162726111","https://openalex.org/W2165329839","https://openalex.org/W2166621589","https://openalex.org/W2167865917","https://openalex.org/W2167988313","https://openalex.org/W2168102371","https://openalex.org/W2170726034","https://openalex.org/W2185655604","https://openalex.org/W2911964244","https://openalex.org/W4205806386","https://openalex.org/W4213245422","https://openalex.org/W4248073216","https://openalex.org/W6683923952"],"related_works":["https://openalex.org/W2074226157","https://openalex.org/W1585350690","https://openalex.org/W2378666660","https://openalex.org/W2002601993","https://openalex.org/W2998709103","https://openalex.org/W2075849238","https://openalex.org/W1979790590","https://openalex.org/W2390334059","https://openalex.org/W2162726111","https://openalex.org/W1884620995"],"abstract_inverted_index":{"Fine-grained":[0],"data":[1,29,46,87,104,112,123,136,140,156,194],"parallelism":[2,30],"is":[3,89],"increasingly":[4],"common":[5],"in":[6],"the":[7,164,176,208,250],"form":[8],"of":[9,34,122,148,178,203,210,233,259],"longer":[10],"vectors":[11],"integrated":[12],"with":[13],"mainstream":[14],"processors":[15],"(SSE,":[16],"AVX)":[17],"and":[18,108,151,180,213],"various":[19,79],"GPU":[20],"architectures.":[21,205],"This":[22],"article":[23],"develops":[24],"support":[25],"for":[26,31,58,66,130,170,230,253],"exploiting":[27],"such":[28,68],"a":[32,72,120,146,187,200,223,231,256,272],"class":[33],"nonnumeric,":[35],"nongraphic":[36],"applications,":[37],"which":[38,193,264],"perform":[39],"computations":[40],"while":[41],"traversing":[42],"many":[43,98,134],"independent,":[44],"irregular":[45,103,135],"structures.":[47,137],"We":[48],"address":[49,115],"this":[50,116],"problem":[51],"by":[52,71,173],"developing":[53],"several":[54],"novel":[55],"techniques.":[56],"First,":[57],"code":[59],"generation,":[60],"we":[61,83,118,159,215,244],"develop":[62,119],"an":[63],"intermediate":[64],"language":[65],"specifying":[67],"traversals,":[69],"followed":[70],"runtime":[73],"scheduler":[74],"that":[75,85,100,126,132,238],"maps":[76],"traversals":[77],"to":[78,91,166,197,222],"SIMD":[80,95,204,228,262],"units.":[81],"Second,":[82],"observe":[84],"good":[86],"locality":[88,129,182],"crucial":[90],"sustained":[92],"performance":[93,161,242],"from":[94],"architectures,":[96,263],"whereas":[97],"applications":[99,131,169,254],"operate":[101],"on":[102,183,199,226],"structures":[105],"(e.g.,":[106],"trees":[107],"graphs)":[109],"have":[110],"poor":[111],"locality.":[113],"To":[114,206,236],"challenge,":[117],"set":[121,232,258],"layout":[124,141,195,252],"optimizations":[125],"improve":[127],"spatial":[128,153],"traverse":[133],"Unlike":[138],"prior":[139],"optimizations,":[142,214],"our":[143,189,211,239,247],"approach":[144,212,240],"incorporates":[145],"notion":[147],"both":[149],"interthread":[150],"intrathread":[152,181],"reuse":[154],"into":[155],"layout.":[157],"Finally,":[158],"enable":[160,220],"portability":[162],"(i.e.,":[163],"ability":[165],"automatically":[167],"optimize":[168],"different":[171],"architectures)":[172],"accurately":[174],"modeling":[175],"impact":[177],"inter-":[179],"program":[184],"performance.":[185],"As":[186],"consequence,":[188],"model":[190,248],"can":[191],"predict":[192],"optimization":[196],"use":[198],"wide":[201],"variety":[202],"demonstrate":[207,237],"efficacy":[209],"first":[216],"show":[217,245],"how":[218,246],"they":[219],"up":[221],"12X":[224],"speedup":[225,270],"one":[227],"architecture":[229],"real-world":[234,261],"applications.":[235],"enables":[241],"portability,":[243],"predicts":[249],"optimal":[251],"across":[255],"diverse":[257],"three":[260],"offers":[265],"as":[266,268],"much":[267],"45%":[269],"over":[271],"suboptimal":[273],"solution.":[274]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":2},{"year":2016,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
