{"id":"https://openalex.org/W2765095808","doi":"https://doi.org/10.1145/3123939.3136952","title":"Using intra-core loop-task accelerators to improve the productivity and performance of task-based parallel programs","display_name":"Using intra-core loop-task accelerators to improve the productivity and performance of task-based parallel programs","publication_year":2017,"publication_date":"2017-10-14","ids":{"openalex":"https://openalex.org/W2765095808","doi":"https://doi.org/10.1145/3123939.3136952","mag":"2765095808"},"language":"en","primary_location":{"id":"doi:10.1145/3123939.3136952","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3123939.3136952","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 50th Annual IEEE/ACM International Symposium on Microarchitecture","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5107868041","display_name":"Ji Kim","orcid":null},"institutions":[{"id":"https://openalex.org/I205783295","display_name":"Cornell University","ror":"https://ror.org/05bnh6r87","country_code":"US","type":"education","lineage":["https://openalex.org/I205783295"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ji Kim","raw_affiliation_strings":["Cornell University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Cornell University","institution_ids":["https://openalex.org/I205783295"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020519671","display_name":"Shunning Jiang","orcid":"https://orcid.org/0000-0003-3439-5760"},"institutions":[{"id":"https://openalex.org/I205783295","display_name":"Cornell University","ror":"https://ror.org/05bnh6r87","country_code":"US","type":"education","lineage":["https://openalex.org/I205783295"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shunning Jiang","raw_affiliation_strings":["Cornell University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Cornell University","institution_ids":["https://openalex.org/I205783295"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026966440","display_name":"Christopher Torng","orcid":"https://orcid.org/0000-0002-2385-619X"},"institutions":[{"id":"https://openalex.org/I205783295","display_name":"Cornell University","ror":"https://ror.org/05bnh6r87","country_code":"US","type":"education","lineage":["https://openalex.org/I205783295"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Christopher Torng","raw_affiliation_strings":["Cornell University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Cornell University","institution_ids":["https://openalex.org/I205783295"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046661425","display_name":"Moyang Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I205783295","display_name":"Cornell University","ror":"https://ror.org/05bnh6r87","country_code":"US","type":"education","lineage":["https://openalex.org/I205783295"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Moyang Wang","raw_affiliation_strings":["Cornell University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Cornell University","institution_ids":["https://openalex.org/I205783295"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078586017","display_name":"S Srinath","orcid":null},"institutions":[{"id":"https://openalex.org/I205783295","display_name":"Cornell University","ror":"https://ror.org/05bnh6r87","country_code":"US","type":"education","lineage":["https://openalex.org/I205783295"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shreesha Srinath","raw_affiliation_strings":["Cornell University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Cornell University","institution_ids":["https://openalex.org/I205783295"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009468601","display_name":"Berkin Ilbeyi","orcid":null},"institutions":[{"id":"https://openalex.org/I205783295","display_name":"Cornell University","ror":"https://ror.org/05bnh6r87","country_code":"US","type":"education","lineage":["https://openalex.org/I205783295"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Berkin Ilbeyi","raw_affiliation_strings":["Cornell University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Cornell University","institution_ids":["https://openalex.org/I205783295"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013290812","display_name":"Khalid Al-Hawaj","orcid":null},"institutions":[{"id":"https://openalex.org/I205783295","display_name":"Cornell University","ror":"https://ror.org/05bnh6r87","country_code":"US","type":"education","lineage":["https://openalex.org/I205783295"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Khalid Al-Hawaj","raw_affiliation_strings":["Cornell University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Cornell University","institution_ids":["https://openalex.org/I205783295"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5091660287","display_name":"Christopher Batten","orcid":"https://orcid.org/0000-0002-2835-667X"},"institutions":[{"id":"https://openalex.org/I205783295","display_name":"Cornell University","ror":"https://ror.org/05bnh6r87","country_code":"US","type":"education","lineage":["https://openalex.org/I205783295"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Christopher Batten","raw_affiliation_strings":["Cornell University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Cornell University","institution_ids":["https://openalex.org/I205783295"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.9248,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.76036389,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"759","last_page":"773"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.8538440465927124},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8370312452316284},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7371302843093872},{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.7029834389686584},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.5571936368942261},{"id":"https://openalex.org/keywords/decoupling","display_name":"Decoupling (probability)","score":0.5269739627838135},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4872857332229614},{"id":"https://openalex.org/keywords/task-parallelism","display_name":"Task parallelism","score":0.4593328535556793},{"id":"https://openalex.org/keywords/instruction-set","display_name":"Instruction set","score":0.4508242607116699},{"id":"https://openalex.org/keywords/performance-improvement","display_name":"Performance improvement","score":0.4431791305541992},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.3270263671875},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.20002391934394836}],"concepts":[{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.8538440465927124},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8370312452316284},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7371302843093872},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.7029834389686584},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.5571936368942261},{"id":"https://openalex.org/C205606062","wikidata":"https://www.wikidata.org/wiki/Q5249645","display_name":"Decoupling (probability)","level":2,"score":0.5269739627838135},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4872857332229614},{"id":"https://openalex.org/C42992933","wikidata":"https://www.wikidata.org/wiki/Q691169","display_name":"Task parallelism","level":3,"score":0.4593328535556793},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.4508242607116699},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.4431791305541992},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3270263671875},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.20002391934394836},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C133731056","wikidata":"https://www.wikidata.org/wiki/Q4917288","display_name":"Control engineering","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3123939.3136952","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3123939.3136952","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 50th Annual IEEE/ACM International Symposium on Microarchitecture","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7","score":0.8700000047683716}],"awards":[{"id":"https://openalex.org/G1424915592","display_name":null,"funder_award_id":"FA9550-15-1-0194","funder_id":"https://openalex.org/F4320338279","funder_display_name":"Air Force Office of Scientific Research"},{"id":"https://openalex.org/G7972656527","display_name":null,"funder_award_id":"1149464, 1337240, 1512937, 1527065","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320338279","display_name":"Air Force Office of Scientific Research","ror":"https://ror.org/011e9bt93"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":58,"referenced_works":["https://openalex.org/W1482635212","https://openalex.org/W1536852470","https://openalex.org/W1577137152","https://openalex.org/W1607699646","https://openalex.org/W1895381303","https://openalex.org/W1966668827","https://openalex.org/W1974165977","https://openalex.org/W1975579741","https://openalex.org/W1976922833","https://openalex.org/W1979660638","https://openalex.org/W1995902175","https://openalex.org/W1996320382","https://openalex.org/W1997162567","https://openalex.org/W1998886328","https://openalex.org/W2007944027","https://openalex.org/W2012252449","https://openalex.org/W2012548389","https://openalex.org/W2013156670","https://openalex.org/W2014259743","https://openalex.org/W2019380637","https://openalex.org/W2024122052","https://openalex.org/W2054983537","https://openalex.org/W2067354926","https://openalex.org/W2079751107","https://openalex.org/W2081373884","https://openalex.org/W2084423328","https://openalex.org/W2089672258","https://openalex.org/W2098297530","https://openalex.org/W2107978915","https://openalex.org/W2108801243","https://openalex.org/W2109473404","https://openalex.org/W2112085716","https://openalex.org/W2113563476","https://openalex.org/W2118826546","https://openalex.org/W2125754912","https://openalex.org/W2134427337","https://openalex.org/W2135947393","https://openalex.org/W2143114052","https://openalex.org/W2146381930","https://openalex.org/W2147657366","https://openalex.org/W2148041475","https://openalex.org/W2148443481","https://openalex.org/W2155503253","https://openalex.org/W2155568054","https://openalex.org/W2156831150","https://openalex.org/W2158664566","https://openalex.org/W2159481344","https://openalex.org/W2160559747","https://openalex.org/W2167399819","https://openalex.org/W2170382128","https://openalex.org/W2240718600","https://openalex.org/W2273348289","https://openalex.org/W2464177207","https://openalex.org/W2522451327","https://openalex.org/W2581189719","https://openalex.org/W2588464298","https://openalex.org/W4230609379","https://openalex.org/W4251700126"],"related_works":["https://openalex.org/W1992352827","https://openalex.org/W2994245508","https://openalex.org/W4242172182","https://openalex.org/W2082875307","https://openalex.org/W4237780868","https://openalex.org/W4285302443","https://openalex.org/W4396938741","https://openalex.org/W2019451907","https://openalex.org/W2127594310","https://openalex.org/W2137845512"],"abstract_inverted_index":{"Task-based":[0],"parallel":[1,44,172],"programming":[2],"frameworks":[3],"offer":[4],"compelling":[5],"productivity":[6],"and":[7,47,81,104,125,146,164,169],"performance":[8,163],"benefits":[9],"for":[10,93],"modern":[11],"chip":[12],"multi-processors":[13],"(CMPs).":[14],"At":[15],"the":[16,42,73],"same":[17],"time,":[18],"CMPs":[19,61,157],"also":[20,147],"provide":[21],"packed-SIMD":[22,34],"units":[23,35],"to":[24,76,99,109,130],"exploit":[25],"fine-grain":[26],"data":[27],"parallelism.":[28],"Two":[29],"fundamental":[30],"challenges":[31],"make":[32],"using":[33],"with":[36,62,158,174],"task-parallel":[37],"programs":[38,173],"particularly":[39],"difficult:":[40],"(1)":[41],"intra-core":[43,63],"abstraction":[45],"gap;":[46],"(2)":[48],"inefficient":[49],"execution":[50,80],"of":[51,96,120,141],"irregular":[52,105,170],"tasks.":[53,107],"To":[54],"address":[55],"these":[56],"challenges,":[57],"we":[58],"propose":[59],"augmenting":[60,156],"loop-task":[64,79,171],"accelerators":[65],"(LTAs).":[66],"We":[67],"introduce":[68],"a":[69],"lightweight":[70,159],"hint":[71],"in":[72,116,137],"instruction":[74],"set":[75],"elegantly":[77],"encode":[78],"an":[82,110,117,131,138],"LTA":[83],"microarchitectural":[84],"template":[85],"that":[86],"can":[87,161],"be":[88],"configured":[89],"at":[90],"design":[91],"time":[92],"different":[94],"amounts":[95],"spatial/temporal":[97],"decoupling":[98],"efficiently":[100],"execute":[101],"both":[102,167],"regular":[103,168],"loop":[106],"Compared":[108,129],"in-order":[111],"CMP":[112,133],"baseline,":[113,134],"CMP+LTA":[114,135],"results":[115,136],"average":[118,139],"speedup":[119,140],"4.2X":[121],"(1.8X":[122],"area":[123,144],"normalized)":[124,145],"similar":[126],"energy":[127,149],"efficiency.":[128],"out-of-order":[132],"2.3X":[142],"(1.5X":[143],"improves":[148],"efficiency":[150,165],"by":[151],"3.2X.":[152],"Our":[153],"work":[154],"suggests":[155],"LTAs":[160],"improve":[162],"on":[166],"minimal":[175],"software":[176],"changes.":[177]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
