{"id":"https://openalex.org/W2936491961","doi":"https://doi.org/10.1145/3291050","title":"HAWS","display_name":"HAWS","publication_year":2019,"publication_date":"2019-04-18","ids":{"openalex":"https://openalex.org/W2936491961","doi":"https://doi.org/10.1145/3291050","mag":"2936491961"},"language":"en","primary_location":{"id":"doi:10.1145/3291050","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3291050","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3291050","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3291050","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5014736530","display_name":"Xun Gong","orcid":"https://orcid.org/0000-0003-0417-8028"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Xun Gong","raw_affiliation_strings":["Northeastern University, Boston, MA, USA"],"affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, MA, USA","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057421794","display_name":"Xiang Gong","orcid":"https://orcid.org/0000-0002-0170-2982"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiang Gong","raw_affiliation_strings":["Northeastern University, Boston, MA, USA"],"affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, MA, USA","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101516694","display_name":"Leiming Yu","orcid":"https://orcid.org/0000-0003-4917-1030"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Leiming Yu","raw_affiliation_strings":["Northeastern University, Boston, MA, USA"],"affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, MA, USA","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5061128237","display_name":"David Kaeli","orcid":"https://orcid.org/0000-0002-5692-0151"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David Kaeli","raw_affiliation_strings":["Northeastern University, Boston, MA, USA"],"affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, MA, USA","institution_ids":["https://openalex.org/I12912129"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5014736530"],"corresponding_institution_ids":["https://openalex.org/I12912129"],"apc_list":null,"apc_paid":null,"fwci":1.4446,"has_fulltext":true,"cited_by_count":13,"citation_normalized_percentile":{"value":0.79819984,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"16","issue":"2","first_page":"1","last_page":"22"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9861000180244446,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9836999773979187,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.900833249092102},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.6284421682357788},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5683786273002625},{"id":"https://openalex.org/keywords/memory-hierarchy","display_name":"Memory hierarchy","score":0.5021569728851318},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.48750898241996765},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.4205498695373535},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.4189241826534271},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.40436986088752747},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.181659996509552}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.900833249092102},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.6284421682357788},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5683786273002625},{"id":"https://openalex.org/C2778100165","wikidata":"https://www.wikidata.org/wiki/Q1589327","display_name":"Memory hierarchy","level":3,"score":0.5021569728851318},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.48750898241996765},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.4205498695373535},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.4189241826534271},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.40436986088752747},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.181659996509552},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3291050","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3291050","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3291050","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3291050","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3291050","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3291050","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.6200000047683716}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2936491961.pdf","grobid_xml":"https://content.openalex.org/works/W2936491961.grobid-xml"},"referenced_works_count":33,"referenced_works":["https://openalex.org/W1667652561","https://openalex.org/W1964826176","https://openalex.org/W1971421905","https://openalex.org/W1984222112","https://openalex.org/W1986535055","https://openalex.org/W1998886328","https://openalex.org/W2047060659","https://openalex.org/W2048441570","https://openalex.org/W2053744175","https://openalex.org/W2059807497","https://openalex.org/W2090584832","https://openalex.org/W2092608522","https://openalex.org/W2096661534","https://openalex.org/W2103742924","https://openalex.org/W2116986122","https://openalex.org/W2124792150","https://openalex.org/W2125979435","https://openalex.org/W2142444503","https://openalex.org/W2155503253","https://openalex.org/W2156831150","https://openalex.org/W2164228393","https://openalex.org/W2238992335","https://openalex.org/W2315868086","https://openalex.org/W2464177207","https://openalex.org/W2470243357","https://openalex.org/W2511318867","https://openalex.org/W2593733978","https://openalex.org/W2604449259","https://openalex.org/W2736244279","https://openalex.org/W3007321493","https://openalex.org/W3013490664","https://openalex.org/W4229618172","https://openalex.org/W4249082578"],"related_works":["https://openalex.org/W2023832055","https://openalex.org/W2082485924","https://openalex.org/W2489934651","https://openalex.org/W108401543","https://openalex.org/W2279642117","https://openalex.org/W4300190729","https://openalex.org/W2953056293","https://openalex.org/W4200143910","https://openalex.org/W2019374455","https://openalex.org/W2033486618"],"abstract_inverted_index":{"Graphics":[0],"Processing":[1],"Units":[2],"(GPUs)":[3],"have":[4],"become":[5,71],"an":[6,159],"attractive":[7],"platform":[8],"for":[9,176],"accelerating":[10],"challenging":[11],"applications":[12],"on":[13,54,147,151,174],"a":[14,32,85,99,113,126],"range":[15,34],"of":[16,35,65,77,125,139,162],"platforms,":[17],"from":[18],"High":[19],"Performance":[20],"Computing":[21],"(HPC)":[22],"to":[23,59,74,91,102,119],"full-featured":[24],"smartphones.":[25],"They":[26],"can":[27,107,117,168],"overcome":[28],"computational":[29],"barriers":[30],"in":[31,62,122],"wide":[33],"data-parallel":[36],"kernels.":[37],"GPUs":[38],"hide":[39,75],"pipeline":[40],"stalls":[41],"and":[42],"memory":[43,56,109,127,177],"latency":[44],"by":[45,97,133,142,172],"utilizing":[46],"efficient":[47],"thread":[48],"preemption.":[49],"But":[50],"given":[51],"the":[52,55,60,63,123,152],"demands":[53],"hierarchy":[57],"due":[58],"growth":[61],"number":[64],"computing":[66],"cores":[67],"on-chip,":[68],"it":[69],"has":[70],"increasingly":[72],"difficult":[73],"all":[76],"these":[78],"stalls.":[79,94,110],"In":[80],"this":[81],"article,":[82],"we":[83],"propose":[84],"novel":[86],"Hint-Assisted":[87],"Wavefront":[88],"Scheduler":[89],"(HAWS)":[90],"bypass":[92,108],"long-latency":[93],"HAWS":[95,111,136,167],"starts":[96],"enhancing":[98],"compiler":[100],"infrastructure":[101],"identify":[103],"potential":[104],"opportunities":[105],"that":[106,116],"includes":[112],"wavefront":[114],"scheduler":[115],"continue":[118],"execute":[120],"instructions":[121,130],"shadow":[124],"stall,":[128],"executing":[129],"speculatively,":[131],"guided":[132],"compiler-generated":[134],"hints.":[135],"increases":[137],"utilization":[138],"GPU":[140,156],"resources":[141],"aggressively":[143],"fetching/executing":[144],"speculatively.":[145],"Based":[146],"our":[148],"simulation":[149],"results":[150],"AMD":[153],"Southern":[154],"Islands":[155],"architecture,":[157],"at":[158],"estimated":[160],"cost":[161],"0.4%":[163],"total":[164],"chip":[165],"area,":[166],"improve":[169],"application":[170],"performance":[171],"14.6%":[173],"average":[175],"intensive":[178],"applications.":[179]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2019-04-25T00:00:00"}
