{"id":"https://openalex.org/W3175298944","doi":"https://doi.org/10.1109/tpds.2021.3093231","title":"Repurposing GPU Microarchitectures with Light-Weight Out-Of-Order Execution","display_name":"Repurposing GPU Microarchitectures with Light-Weight Out-Of-Order Execution","publication_year":2021,"publication_date":"2021-06-29","ids":{"openalex":"https://openalex.org/W3175298944","doi":"https://doi.org/10.1109/tpds.2021.3093231","mag":"3175298944"},"language":"en","primary_location":{"id":"doi:10.1109/tpds.2021.3093231","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2021.3093231","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5006425693","display_name":"Konstantinos Iliakis","orcid":"https://orcid.org/0000-0002-1403-6851"},"institutions":[{"id":"https://openalex.org/I174458059","display_name":"National Technical University of Athens","ror":"https://ror.org/03cx6bg69","country_code":"GR","type":"education","lineage":["https://openalex.org/I174458059"]}],"countries":["GR"],"is_corresponding":true,"raw_author_name":"Konstantinos Iliakis","raw_affiliation_strings":["National Technical University of Athens, Athens, Greece"],"raw_orcid":"https://orcid.org/0000-0002-1403-6851","affiliations":[{"raw_affiliation_string":"National Technical University of Athens, Athens, Greece","institution_ids":["https://openalex.org/I174458059"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076432415","display_name":"Sotirios Xydis","orcid":"https://orcid.org/0000-0003-3151-2730"},"institutions":[{"id":"https://openalex.org/I32762134","display_name":"Harokopio University of Athens","ror":"https://ror.org/02k5gp281","country_code":"GR","type":"education","lineage":["https://openalex.org/I32762134"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Sotirios Xydis","raw_affiliation_strings":["Harokopio University of Athens, Athens, Greece"],"raw_orcid":"https://orcid.org/0000-0003-3151-2730","affiliations":[{"raw_affiliation_string":"Harokopio University of Athens, Athens, Greece","institution_ids":["https://openalex.org/I32762134"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043131021","display_name":"Dimitrios Soudris","orcid":"https://orcid.org/0000-0002-6930-6847"},"institutions":[{"id":"https://openalex.org/I174458059","display_name":"National Technical University of Athens","ror":"https://ror.org/03cx6bg69","country_code":"GR","type":"education","lineage":["https://openalex.org/I174458059"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Dimitrios Soudris","raw_affiliation_strings":["National Technical University of Athens, Athens, Greece"],"raw_orcid":"https://orcid.org/0000-0002-6930-6847","affiliations":[{"raw_affiliation_string":"National Technical University of Athens, Athens, Greece","institution_ids":["https://openalex.org/I174458059"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5006425693"],"corresponding_institution_ids":["https://openalex.org/I174458059"],"apc_list":null,"apc_paid":null,"fwci":1.1788,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.76820481,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":"33","issue":"2","first_page":"388","last_page":"402"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9188687205314636},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6911393404006958},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.6228308081626892},{"id":"https://openalex.org/keywords/microarchitecture","display_name":"Microarchitecture","score":0.5732225775718689},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.5551502704620361},{"id":"https://openalex.org/keywords/instruction-set","display_name":"Instruction set","score":0.48398256301879883},{"id":"https://openalex.org/keywords/task-parallelism","display_name":"Task parallelism","score":0.47878730297088623},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.4640035033226013},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.4504736363887787},{"id":"https://openalex.org/keywords/execution-model","display_name":"Execution model","score":0.44405075907707214},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.4315018355846405},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.41524431109428406},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.27343398332595825},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.1720038652420044},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.13270696997642517}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9188687205314636},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6911393404006958},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.6228308081626892},{"id":"https://openalex.org/C107598950","wikidata":"https://www.wikidata.org/wiki/Q259864","display_name":"Microarchitecture","level":2,"score":0.5732225775718689},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.5551502704620361},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.48398256301879883},{"id":"https://openalex.org/C42992933","wikidata":"https://www.wikidata.org/wiki/Q691169","display_name":"Task parallelism","level":3,"score":0.47878730297088623},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.4640035033226013},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.4504736363887787},{"id":"https://openalex.org/C2776834041","wikidata":"https://www.wikidata.org/wiki/Q25346349","display_name":"Execution model","level":2,"score":0.44405075907707214},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.4315018355846405},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.41524431109428406},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.27343398332595825},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.1720038652420044},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.13270696997642517}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tpds.2021.3093231","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2021.3093231","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4300000071525574,"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":60,"referenced_works":["https://openalex.org/W1482635212","https://openalex.org/W1510808677","https://openalex.org/W1667652561","https://openalex.org/W1762731526","https://openalex.org/W1967343210","https://openalex.org/W1968775993","https://openalex.org/W1970815868","https://openalex.org/W1979527452","https://openalex.org/W1997162567","https://openalex.org/W2045275492","https://openalex.org/W2047060659","https://openalex.org/W2048441570","https://openalex.org/W2053744175","https://openalex.org/W2059301531","https://openalex.org/W2078994750","https://openalex.org/W2079038734","https://openalex.org/W2080592089","https://openalex.org/W2081373884","https://openalex.org/W2090584832","https://openalex.org/W2093043622","https://openalex.org/W2096661534","https://openalex.org/W2100799944","https://openalex.org/W2106579652","https://openalex.org/W2110195531","https://openalex.org/W2122563027","https://openalex.org/W2123608497","https://openalex.org/W2135947393","https://openalex.org/W2142444503","https://openalex.org/W2155568054","https://openalex.org/W2170382128","https://openalex.org/W2170585292","https://openalex.org/W2273440736","https://openalex.org/W2303605678","https://openalex.org/W2315868086","https://openalex.org/W2398524299","https://openalex.org/W2517837804","https://openalex.org/W2593733978","https://openalex.org/W2612387305","https://openalex.org/W2625200202","https://openalex.org/W2725179571","https://openalex.org/W2763549657","https://openalex.org/W2773758307","https://openalex.org/W2796649226","https://openalex.org/W2803405094","https://openalex.org/W2884590322","https://openalex.org/W2934426651","https://openalex.org/W2936491961","https://openalex.org/W2998200364","https://openalex.org/W3137551601","https://openalex.org/W3146061826","https://openalex.org/W4211127289","https://openalex.org/W4230022086","https://openalex.org/W4234833047","https://openalex.org/W4245911027","https://openalex.org/W4255847023","https://openalex.org/W4302296459","https://openalex.org/W6637151318","https://openalex.org/W6694513646","https://openalex.org/W6746328833","https://openalex.org/W6750448596"],"related_works":["https://openalex.org/W2091793939","https://openalex.org/W2055881261","https://openalex.org/W4250432526","https://openalex.org/W2101536355","https://openalex.org/W2171175484","https://openalex.org/W2085872434","https://openalex.org/W2883183116","https://openalex.org/W2026084820","https://openalex.org/W2562747857","https://openalex.org/W4308095153"],"abstract_inverted_index":{"GPU":[0,16,49,99],"is":[1,89],"the":[2,47,70,84,95,121,128,139,150,163,192],"dominant":[3,85],"platform":[4],"for":[5,174],"accelerating":[6],"general-purpose":[7,212],"workloads":[8],"due":[9],"to":[10,37,60,68,92,119,136,148],"its":[11],"computing":[12],"capacity":[13],"and":[14,33,75,143,153,180,185],"cost-efficiency.":[15],"applications":[17],"cover":[18],"an":[19,178],"ever-growing":[20],"range":[21],"of":[22,56,65,72,97,183,194,210],"domains.":[23],"To":[24,101,126],"achieve":[25],"high":[26],"throughput,":[27],"GPUs":[28],"rely":[29],"on":[30,172],"massive":[31],"multi-threading":[32],"fast":[34],"context":[35],"switching":[36],"overlap":[38],"computations":[39],"with":[40,111,177],"memory":[41,73],"operations.":[42],"We":[43,81,160],"observe":[44],"that":[45,58,83,162],"among":[46],"diverse":[48],"workloads,":[50],"there":[51],"exists":[52],"a":[53,62,108,198,207],"significant":[54],"class":[55],"kernels":[57],"fail":[59],"maintain":[61],"sufficient":[63],"number":[64],"active":[66],"warps":[67],"hide":[69],"latency":[71],"operations,":[74],"thus":[76],"suffer":[77],"from":[78],"frequent":[79],"stalling.":[80],"argue":[82],"Thread-Level":[86,123],"Parallelism":[87,118,124],"model":[88],"not":[90],"enough":[91],"efficiently":[93],"accommodate":[94],"variability":[96],"modern":[98],"applications.":[100],"address":[102],"this":[103],"inherent":[104],"inefficiency,":[105],"we":[106,131,190],"propose":[107],"novel":[109],"micro-architecture":[110,199],"lightweight":[112],"Out-Of-Order":[113],"execution":[114],"capability":[115],"enabling":[116],"Instruction-Level":[117],"complement":[120],"conventional":[122],"model.":[125],"minimize":[127],"hardware":[129],"overhead,":[130,155],"carefully":[132],"design":[133,146],"our":[134,195],"extension":[135],"highly":[137],"re-use":[138],"existing":[140],"micro-architectural":[141],"structures":[142],"study":[144],"various":[145],"trade-offs":[147],"contain":[149],"overall":[151],"area":[152,179],"power":[154,181],"while":[156],"providing":[157,202],"improved":[158],"performance.":[159],"show":[161],"proposed":[164],"architecture":[165],"outperforms":[166],"traditional":[167],"platforms":[168],"by":[169,201],"23":[170],"percent":[171,204],"average":[173],"low-occupancy":[175],"kernels,":[176],"overhead":[182],"1.29":[184],"10.05":[186],"percent,":[187],"respectively.":[188],"Finally,":[189],"establish":[191],"potential":[193],"proposal":[196],"as":[197],"alternative":[200],"16":[203],"speedup":[205],"over":[206],"wide":[208],"collection":[209],"60":[211],"kernels.":[213]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
