{"id":"https://openalex.org/W4249544322","doi":"https://doi.org/10.1109/pact.2013.6618831","title":"Task sampling: computer architecture simulation in the many-core era","display_name":"Task sampling: computer architecture simulation in the many-core era","publication_year":2013,"publication_date":"2013-10-01","ids":{"openalex":"https://openalex.org/W4249544322","doi":"https://doi.org/10.1109/pact.2013.6618831"},"language":"en","primary_location":{"id":"doi:10.1109/pact.2013.6618831","is_oa":false,"landing_page_url":"https://doi.org/10.1109/pact.2013.6618831","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 22nd International Conference on Parallel Architectures and Compilation Techniques","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5029838000","display_name":"Majedul Haque Sujon","orcid":null},"institutions":[{"id":"https://openalex.org/I45438204","display_name":"The University of Texas at San Antonio","ror":"https://ror.org/01kd65564","country_code":"US","type":"education","lineage":["https://openalex.org/I45438204"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Majedul Haque Sujon","raw_affiliation_strings":["Department of Computer Science, University of Texas, San Antonio, San Antonio, TX, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Texas, San Antonio, San Antonio, TX, USA","institution_ids":["https://openalex.org/I45438204"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111441584","display_name":"R. Clint Whaley","orcid":null},"institutions":[{"id":"https://openalex.org/I121820613","display_name":"Louisiana State University","ror":"https://ror.org/05ect4e57","country_code":"US","type":"education","lineage":["https://openalex.org/I121820613"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"R. Clint Whaley","raw_affiliation_strings":["School of EE & CS, CCT, Louisiana State University, Baton Rouge, LA, USA"],"affiliations":[{"raw_affiliation_string":"School of EE & CS, CCT, Louisiana State University, Baton Rouge, LA, USA","institution_ids":["https://openalex.org/I121820613"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101413850","display_name":"Qing Yi","orcid":null},"institutions":[{"id":"https://openalex.org/I888729015","display_name":"University of Colorado Colorado Springs","ror":"https://ror.org/054spjc55","country_code":"US","type":"education","lineage":["https://openalex.org/I888729015"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Qing Yi","raw_affiliation_strings":["Department of Computer Science, University of Colorado Colorado Springs, Colorado Springs, CO, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Colorado Colorado Springs, Colorado Springs, CO, USA","institution_ids":["https://openalex.org/I888729015"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5029838000"],"corresponding_institution_ids":["https://openalex.org/I45438204"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.36086064,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"405","last_page":"406"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11697","display_name":"Numerical Methods and Algorithms","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.875968337059021},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.8421776294708252},{"id":"https://openalex.org/keywords/vectorization","display_name":"Vectorization (mathematics)","score":0.8282995820045471},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.8007825613021851},{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.793755292892456},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.7292674779891968},{"id":"https://openalex.org/keywords/floating-point","display_name":"Floating point","score":0.5476101040840149},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.4789322316646576},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4586733281612396},{"id":"https://openalex.org/keywords/optimizing-compiler","display_name":"Optimizing compiler","score":0.41942429542541504},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.36353060603141785},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.21992003917694092}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.875968337059021},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.8421776294708252},{"id":"https://openalex.org/C41681595","wikidata":"https://www.wikidata.org/wiki/Q7917855","display_name":"Vectorization (mathematics)","level":2,"score":0.8282995820045471},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.8007825613021851},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.793755292892456},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.7292674779891968},{"id":"https://openalex.org/C84211073","wikidata":"https://www.wikidata.org/wiki/Q117879","display_name":"Floating point","level":2,"score":0.5476101040840149},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.4789322316646576},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4586733281612396},{"id":"https://openalex.org/C190902152","wikidata":"https://www.wikidata.org/wiki/Q1325106","display_name":"Optimizing compiler","level":3,"score":0.41942429542541504},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.36353060603141785},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.21992003917694092},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/pact.2013.6618831","is_oa":false,"landing_page_url":"https://doi.org/10.1109/pact.2013.6618831","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 22nd International Conference on Parallel Architectures and Compilation Techniques","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W30740153","https://openalex.org/W1495550651","https://openalex.org/W1596351413","https://openalex.org/W1851570257","https://openalex.org/W1964031104","https://openalex.org/W1966324811","https://openalex.org/W1984972320","https://openalex.org/W2049890071","https://openalex.org/W2099404643","https://openalex.org/W2111394443","https://openalex.org/W2118031182","https://openalex.org/W2129962996","https://openalex.org/W2147423491","https://openalex.org/W2996943613","https://openalex.org/W3015929788","https://openalex.org/W3149591378","https://openalex.org/W4232434222","https://openalex.org/W4236137312","https://openalex.org/W4243104579","https://openalex.org/W4244894488","https://openalex.org/W4248145683","https://openalex.org/W4251173184","https://openalex.org/W4251500183","https://openalex.org/W6651631105"],"related_works":["https://openalex.org/W2566637483","https://openalex.org/W2127324789","https://openalex.org/W3024308452","https://openalex.org/W2099629705","https://openalex.org/W4244894488","https://openalex.org/W4285390450","https://openalex.org/W2366442643","https://openalex.org/W2090268225","https://openalex.org/W2021715972","https://openalex.org/W75461624"],"abstract_inverted_index":{"Modern":[0],"architectures":[1],"increasingly":[2],"rely":[3],"on":[4],"SIMD":[5],"vectorization":[6,23],"to":[7,52,60,93,102,113],"improve":[8],"performance":[9],"for":[10,21,98,116,121],"floating":[11],"point":[12],"intensive":[13],"scientific":[14],"applications.":[15],"However,":[16],"existing":[17],"compiler":[18,87,109],"optimization":[19],"techniques":[20],"automatic":[22],"are":[24,58],"inhibited":[25],"by":[26,134],"the":[27,69,75,95],"presence":[28],"of":[29],"unknown":[30],"control":[31],"flow":[32],"surrounding":[33],"partially":[34],"vectorizable":[35,133],"computations.":[36],"In":[37],"this":[38],"paper,":[39],"we":[40],"present":[41],"a":[42],"new":[43],"approach,":[44],"speculative":[45],"vectorization,":[46],"which":[47],"speculates":[48],"past":[49],"dependent":[50],"branches":[51],"aggressively":[53],"vectorize":[54],"computational":[55],"paths":[56,97],"that":[57],"expected":[59],"be":[61],"taken":[62],"frequently":[63],"at":[64],"runtime,":[65],"while":[66,127],"simply":[67],"restarting":[68],"calculation":[70],"using":[71,125],"scalar":[72],"instructions":[73],"when":[74],"speculation":[76],"fails.":[77],"We":[78],"have":[79,89],"integrated":[80],"our":[81,107],"technique":[82],"in":[83],"an":[84],"iterative":[85],"optimizing":[86,108],"and":[88,119],"employed":[90],"empirical":[91],"tuning":[92],"select":[94],"profitable":[96],"speculation.":[99],"When":[100],"applied":[101],"optimize":[103],"9":[104],"floating-point":[105],"benchmarks,":[106],"has":[110],"achieved":[111],"up":[112],"6.8X":[114],"speedup":[115],"single":[117],"precision":[118,123],"3.4X":[120],"double":[122],"kernels":[124],"AVX,":[126],"vectorizing":[128],"some":[129],"operations":[130],"considered":[131],"not":[132],"prior":[135],"techniques.":[136]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2}],"updated_date":"2026-02-25T08:12:03.925757","created_date":"2025-10-10T00:00:00"}
