{"id":"https://openalex.org/W4318328068","doi":"https://doi.org/10.1145/3559009.3569663","title":"Combining Run-Time Checks and Compile-Time Analysis to Improve Control Flow Auto-Vectorization","display_name":"Combining Run-Time Checks and Compile-Time Analysis to Improve Control Flow Auto-Vectorization","publication_year":2022,"publication_date":"2022-10-08","ids":{"openalex":"https://openalex.org/W4318328068","doi":"https://doi.org/10.1145/3559009.3569663"},"language":"en","primary_location":{"id":"doi:10.1145/3559009.3569663","is_oa":true,"landing_page_url":"http://dx.doi.org/10.1145/3559009.3569663","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3559009.3569663","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference on Parallel Architectures and Compilation Techniques","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3559009.3569663","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018845091","display_name":"Bangtian Liu","orcid":"https://orcid.org/0000-0002-9943-6941"},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Bangtian Liu","raw_affiliation_strings":["University of Toronto, Canada"],"affiliations":[{"raw_affiliation_string":"University of Toronto, Canada","institution_ids":["https://openalex.org/I185261750"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070412024","display_name":"Avery Laird","orcid":"https://orcid.org/0000-0001-9063-6018"},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Avery Laird","raw_affiliation_strings":["University of Toronto, Canada"],"affiliations":[{"raw_affiliation_string":"University of Toronto, Canada","institution_ids":["https://openalex.org/I185261750"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109393996","display_name":"Wai Hung Tsang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113654","display_name":"IBM (Canada)","ror":"https://ror.org/025sxka56","country_code":"CA","type":"company","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210113654"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Wai Hung Tsang","raw_affiliation_strings":["IBM, Canada"],"affiliations":[{"raw_affiliation_string":"IBM, Canada","institution_ids":["https://openalex.org/I4210113654"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012183381","display_name":"Bardia Mahjour","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113654","display_name":"IBM (Canada)","ror":"https://ror.org/025sxka56","country_code":"CA","type":"company","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210113654"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Bardia Mahjour","raw_affiliation_strings":["IBM, Canada"],"affiliations":[{"raw_affiliation_string":"IBM, Canada","institution_ids":["https://openalex.org/I4210113654"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5003682451","display_name":"Maryam Mehri Dehnavi","orcid":"https://orcid.org/0000-0002-2719-8788"},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Maryam Mehri Dehnavi","raw_affiliation_strings":["University of Toronto, Canada"],"affiliations":[{"raw_affiliation_string":"University of Toronto, Canada","institution_ids":["https://openalex.org/I185261750"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5018845091"],"corresponding_institution_ids":["https://openalex.org/I185261750"],"apc_list":null,"apc_paid":null,"fwci":0.2321,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.54320318,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"439","last_page":"450"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11424","display_name":"Security and Verification in Computing","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11005","display_name":"Radiation Effects in Electronics","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8706921339035034},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.7642600536346436},{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.7585009932518005},{"id":"https://openalex.org/keywords/compile-time","display_name":"Compile time","score":0.7343478202819824},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7277665138244629},{"id":"https://openalex.org/keywords/vectorization","display_name":"Vectorization (mathematics)","score":0.6549311280250549},{"id":"https://openalex.org/keywords/control-flow","display_name":"Control flow","score":0.6282708644866943},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.5303065776824951},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.41856902837753296},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.18405693769454956}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8706921339035034},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.7642600536346436},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.7585009932518005},{"id":"https://openalex.org/C200833197","wikidata":"https://www.wikidata.org/wiki/Q333707","display_name":"Compile time","level":3,"score":0.7343478202819824},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7277665138244629},{"id":"https://openalex.org/C41681595","wikidata":"https://www.wikidata.org/wiki/Q7917855","display_name":"Vectorization (mathematics)","level":2,"score":0.6549311280250549},{"id":"https://openalex.org/C160191386","wikidata":"https://www.wikidata.org/wiki/Q868299","display_name":"Control flow","level":2,"score":0.6282708644866943},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.5303065776824951},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.41856902837753296},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.18405693769454956},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3559009.3569663","is_oa":true,"landing_page_url":"http://dx.doi.org/10.1145/3559009.3569663","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3559009.3569663","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference on Parallel Architectures and Compilation Techniques","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3559009.3569663","is_oa":true,"landing_page_url":"http://dx.doi.org/10.1145/3559009.3569663","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3559009.3569663","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference on Parallel Architectures and Compilation Techniques","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4318328068.pdf","grobid_xml":"https://content.openalex.org/works/W4318328068.grobid-xml"},"referenced_works_count":30,"referenced_works":["https://openalex.org/W201406093","https://openalex.org/W1573046885","https://openalex.org/W1966324811","https://openalex.org/W1972663160","https://openalex.org/W1973407183","https://openalex.org/W1982205631","https://openalex.org/W1994316441","https://openalex.org/W2013156670","https://openalex.org/W2020166439","https://openalex.org/W2040281526","https://openalex.org/W2043555680","https://openalex.org/W2047937237","https://openalex.org/W2111394443","https://openalex.org/W2140311411","https://openalex.org/W2143798346","https://openalex.org/W2144344516","https://openalex.org/W2147423491","https://openalex.org/W2333659671","https://openalex.org/W2613264175","https://openalex.org/W2617074614","https://openalex.org/W2798404616","https://openalex.org/W2808317299","https://openalex.org/W2911593804","https://openalex.org/W2949609733","https://openalex.org/W3103937600","https://openalex.org/W3123542955","https://openalex.org/W4220987400","https://openalex.org/W4241619010","https://openalex.org/W4246166885","https://openalex.org/W4248701878"],"related_works":["https://openalex.org/W2566637483","https://openalex.org/W2127324789","https://openalex.org/W3024308452","https://openalex.org/W4244894488","https://openalex.org/W2371266106","https://openalex.org/W4285390450","https://openalex.org/W2979513934","https://openalex.org/W2111180768","https://openalex.org/W2366442643","https://openalex.org/W2090268225"],"abstract_inverted_index":{"SIMD":[0,19,45],"(Single":[1],"Instruction":[2],"Multiple":[3],"Data)":[4],"instructions":[5,20],"apply":[6],"the":[7,48,86,147,159,166,205],"same":[8,49],"operation":[9],"to":[10,17,27,78,108,119,131,142,157,164],"multiple":[11],"elements":[12],"simultaneously.":[13],"Compilers":[14],"transform":[15],"codes":[16],"exploit":[18],"through":[21],"auto-vectorization.":[22],"Control":[23],"flow":[24,99],"can":[25],"lead":[26],"challenges":[28],"for":[29,80,168],"auto-vectorization":[30,100],"tools":[31],"because":[32],"compilers":[33],"conservatively":[34],"assume":[35],"branches":[36],"are":[37,224],"divergent.":[38],"However,":[39],"it":[40],"is":[41,179],"common":[42],"that":[43,74,96],"all":[44],"lanes":[46],"follow":[47],"control-path":[50],"at":[51],"run-time,":[52],"a":[53,70,153,184],"property":[54],"we":[55,62,91],"call":[56],"dynamic":[57,89,106],"uniformity.":[58],"In":[59],"this":[60],"paper,":[61],"present":[63],"VecRC":[64,104,135,178,227],"(an":[65],"auto-vectorizer":[66],"with":[67,111,226],"run-time":[68,76,133,162],"checks),":[69],"novel":[71],"compile-time":[72,94,137],"technique":[73],"uses":[75],"checks":[77,163],"test":[79],"dynamically":[81],"uniform":[82],"control":[83,98],"flows.":[84],"Under":[85],"assumption":[87],"of":[88,149,161,187,211],"uniformity,":[90],"perform":[92],"several":[93],"analyses":[95],"improve":[97],"vs":[101],"state-of-the-art":[102],"approaches.":[103],"leverages":[105],"uniformity":[107,141],"vectorize":[109],"loops":[110],"control-dependent":[112],"loop-carried":[113],"dependences.":[114],"Existing":[115],"strategies":[116],"use":[117],"speculation":[118],"optimistically":[120],"execute":[121],"vector":[122],"code,":[123],"and":[124,194,199,215,222],"must":[125],"correct":[126],"any":[127],"incorrect":[128],"computation":[129],"due":[130],"violated":[132],"assumptions.":[134],"performs":[136],"analysis":[138],"based":[139],"on":[140,183,196,228],"support":[143],"such":[144],"dependences":[145],"without":[146],"overhead":[148],"speculation.":[150],"We":[151],"propose":[152],"probability-based":[154],"cost":[155],"model":[156],"predict":[158],"profitability":[160],"eliminate":[165],"need":[167],"specialized":[169],"profiling":[170],"or":[171],"expensive":[172],"auto-tuning":[173],"required":[174],"in":[175,181],"existing":[176],"methods.":[177],"evaluated":[180],"LLVM":[182],"diverse":[185],"range":[186],"benchmarks":[188],"including":[189],"SPEC2017,":[190],"NPB,":[191],"Parboil,":[192],"TSVC,":[193],"Rodinia":[195],"Intel":[197],"Skylake":[198,206],"IBM":[200],"Power":[201],"9":[202],"architectures.":[203],"On":[204],"architecture,":[207],"geometric":[208],"mean":[209],"speedups":[210],"1.31x,":[212],"1.20x,":[213],"1.19x,":[214],"1.06x":[216],"over":[217],"Region":[218],"Vectorizer,":[219],"GCC,":[220],"Clang,":[221],"ICC":[223],"obtained":[225],"real":[229],"benchmark":[230],"code.":[231]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-31T07:56:22.981413","created_date":"2025-10-10T00:00:00"}
