{"id":"https://openalex.org/W2523339648","doi":"https://doi.org/10.1109/lca.2015.2478778","title":"Studying Inter-Warp Divergence Aware Execution on GPUs","display_name":"Studying Inter-Warp Divergence Aware Execution on GPUs","publication_year":2015,"publication_date":"2015-09-14","ids":{"openalex":"https://openalex.org/W2523339648","doi":"https://doi.org/10.1109/lca.2015.2478778","mag":"2523339648"},"language":"en","primary_location":{"id":"doi:10.1109/lca.2015.2478778","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lca.2015.2478778","pdf_url":null,"source":{"id":"https://openalex.org/S17643076","display_name":"IEEE Computer Architecture Letters","issn_l":"1556-6056","issn":["1556-6056","1556-6064","2473-2575"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Computer Architecture Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034638538","display_name":"Chulian Zhang","orcid":"https://orcid.org/0000-0003-3472-2872"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Chulian Zhang","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Northeastern University, Boston, MA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Northeastern University, Boston, MA","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063615699","display_name":"Hamed Tabkhi","orcid":"https://orcid.org/0000-0001-5420-1121"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hamed Tabkhi","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Northeastern University, Boston, MA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Northeastern University, Boston, MA","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5026700236","display_name":"Gunar Schirner","orcid":"https://orcid.org/0000-0002-5408-8496"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gunar Schirner","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Northeastern University, Boston, MA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Northeastern University, Boston, MA","institution_ids":["https://openalex.org/I12912129"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5034638538"],"corresponding_institution_ids":["https://openalex.org/I12912129"],"apc_list":null,"apc_paid":null,"fwci":0.9689,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.78030098,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"15","issue":"2","first_page":"117","last_page":"120"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8840560913085938},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7158282399177551},{"id":"https://openalex.org/keywords/divergence","display_name":"Divergence (linguistics)","score":0.7115499377250671},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.6305645108222961},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.6125491261482239},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.5736669898033142},{"id":"https://openalex.org/keywords/warp-drive","display_name":"Warp drive","score":0.5324305891990662},{"id":"https://openalex.org/keywords/execution-time","display_name":"Execution time","score":0.47432276606559753},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.43848344683647156},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.41761448979377747},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.37899473309516907},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.3242492079734802},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.12449181079864502},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.10522714257240295}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8840560913085938},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7158282399177551},{"id":"https://openalex.org/C207390915","wikidata":"https://www.wikidata.org/wiki/Q1230525","display_name":"Divergence (linguistics)","level":2,"score":0.7115499377250671},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.6305645108222961},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.6125491261482239},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.5736669898033142},{"id":"https://openalex.org/C155654285","wikidata":"https://www.wikidata.org/wiki/Q81397","display_name":"Warp drive","level":3,"score":0.5324305891990662},{"id":"https://openalex.org/C2989134064","wikidata":"https://www.wikidata.org/wiki/Q288510","display_name":"Execution time","level":2,"score":0.47432276606559753},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.43848344683647156},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.41761448979377747},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.37899473309516907},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3242492079734802},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.12449181079864502},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.10522714257240295},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.0},{"id":"https://openalex.org/C68249636","wikidata":"https://www.wikidata.org/wiki/Q385601","display_name":"Brane","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C33332235","wikidata":"https://www.wikidata.org/wiki/Q18362","display_name":"Theoretical physics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lca.2015.2478778","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lca.2015.2478778","pdf_url":null,"source":{"id":"https://openalex.org/S17643076","display_name":"IEEE Computer Architecture Letters","issn_l":"1556-6056","issn":["1556-6056","1556-6064","2473-2575"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Computer Architecture Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.41999998688697815,"display_name":"Decent work and economic growth","id":"https://metadata.un.org/sdg/8"}],"awards":[{"id":"https://openalex.org/G4390169631","display_name":null,"funder_award_id":"1319501","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W1512452330","https://openalex.org/W1972971542","https://openalex.org/W1979527452","https://openalex.org/W2003286928","https://openalex.org/W2047060659","https://openalex.org/W2080592089","https://openalex.org/W2090584832","https://openalex.org/W2142444503"],"related_works":["https://openalex.org/W1854503031","https://openalex.org/W3062287","https://openalex.org/W2380390332","https://openalex.org/W2742145873","https://openalex.org/W4245975140","https://openalex.org/W1977763331","https://openalex.org/W2062253548","https://openalex.org/W4225414539","https://openalex.org/W2393741509","https://openalex.org/W2167150492"],"abstract_inverted_index":{"This":[0],"letter":[1,17,52],"quantitatively":[2],"studies":[3],"the":[4,16,25,30,48],"benefits":[5],"of":[6,36],"inter-warp":[7,26,67],"divergence":[8,27,68],"aware":[9],"execution":[10,34],"on":[11,47],"GPUs.":[12],"To":[13],"that":[14,94],"end,":[15],"first":[18],"proposes":[19,53],"a":[20,54,102],"novel":[21],"approach":[22],"to":[23,58,65,69,86],"quantify":[24],"by":[28,98],"measuring":[29],"temporal":[31,79,109],"similarity":[32],"in":[33,81,105],"progress":[35],"concurrent":[37],"warps,":[38],"which":[39],"we":[40],"call":[41],"Warp":[42],"Progression":[43],"Similarity":[44],"(WPS).":[45],"Based":[46],"WPS":[49],"metric,":[50],"this":[51],"WPS-aware":[55],"Scheduler":[56],"(WPSaS)":[57],"optimize":[59],"GPU":[60],"throughput.":[61,90],"The":[62],"aim":[63],"is":[64],"manage":[66],"hide":[70],"memory":[71],"access":[72],"latency":[73],"and":[74,78,108],"minimize":[75],"resource":[76,106],"conflicts":[77,107],"under-utilization":[80],"compute":[82],"units":[83],"allowing":[84],"GPUs":[85],"achieve":[87],"their":[88],"peak":[89],"Our":[91],"results":[92],"demonstrate":[93],"WPSaS":[95],"improves":[96],"throughput":[97],"10":[99],"percent":[100],"with":[101],"pronounced":[103],"reduction":[104],"under-utilization.":[110]},"counts_by_year":[{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2015,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
