{"id":"https://openalex.org/W2774112409","doi":"https://doi.org/10.1145/3151035","title":"Improving the Efficiency of GPGPU Work-Queue Through Data Awareness","display_name":"Improving the Efficiency of GPGPU Work-Queue Through Data Awareness","publication_year":2017,"publication_date":"2017-12-05","ids":{"openalex":"https://openalex.org/W2774112409","doi":"https://doi.org/10.1145/3151035","mag":"2774112409"},"language":"en","primary_location":{"id":"doi:10.1145/3151035","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3151035","pdf_url":null,"source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5044840341","display_name":"Libo Huang","orcid":"https://orcid.org/0000-0001-7878-3998"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Libo Huang","raw_affiliation_strings":["National University of Defense Technology, Changsha, Hunan, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, Changsha, Hunan, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086487289","display_name":"Yashuai L\u00fc","orcid":null},"institutions":[{"id":"https://openalex.org/I4210148107","display_name":"Space Engineering University","ror":"https://ror.org/04rj1td02","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210148107"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yashuai L\u00fc","raw_affiliation_strings":["Space Engineering University, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Space Engineering University, Beijing, China","institution_ids":["https://openalex.org/I4210148107"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101790620","display_name":"Li Shen","orcid":"https://orcid.org/0000-0001-9043-2998"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Li Shen","raw_affiliation_strings":["National University of Defense Technology, Changsha, Hunan, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, Changsha, Hunan, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5047295889","display_name":"Zhiying Wang","orcid":"https://orcid.org/0000-0003-3339-3085"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiying Wang","raw_affiliation_strings":["National University of Defense Technology, Changsha, Hunan, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, Changsha, Hunan, China","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5044840341"],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.19205069,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"14","issue":"4","first_page":"1","last_page":"22"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9271279573440552},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.6525024175643921},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6497899889945984},{"id":"https://openalex.org/keywords/queue","display_name":"Queue","score":0.6483554840087891},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.630243182182312},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.4313862919807434},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.41983097791671753},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.2514573931694031},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.12424993515014648},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.08966788649559021}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9271279573440552},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.6525024175643921},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6497899889945984},{"id":"https://openalex.org/C160403385","wikidata":"https://www.wikidata.org/wiki/Q220543","display_name":"Queue","level":2,"score":0.6483554840087891},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.630243182182312},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.4313862919807434},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.41983097791671753},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.2514573931694031},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.12424993515014648},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.08966788649559021}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3151035","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3151035","pdf_url":null,"source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4699999988079071,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"awards":[{"id":"https://openalex.org/G4571707199","display_name":null,"funder_award_id":"61433019, 61472435, 61202129, 61572058, 61472431, 61402501, and U14352217","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W174441044","https://openalex.org/W1978155891","https://openalex.org/W1979527452","https://openalex.org/W1981585303","https://openalex.org/W1983235612","https://openalex.org/W1985291160","https://openalex.org/W1991709203","https://openalex.org/W1996511977","https://openalex.org/W1997162567","https://openalex.org/W2009173369","https://openalex.org/W2016706026","https://openalex.org/W2029940394","https://openalex.org/W2030089573","https://openalex.org/W2049875313","https://openalex.org/W2076304675","https://openalex.org/W2090278477","https://openalex.org/W2102592717","https://openalex.org/W2107251158","https://openalex.org/W2109051991","https://openalex.org/W2138624212","https://openalex.org/W2141966484","https://openalex.org/W2143700077","https://openalex.org/W2152812278","https://openalex.org/W2166918318","https://openalex.org/W3138340923","https://openalex.org/W4245933748","https://openalex.org/W4302340922"],"related_works":["https://openalex.org/W2983282793","https://openalex.org/W1963859303","https://openalex.org/W2364044215","https://openalex.org/W2389600408","https://openalex.org/W240129890","https://openalex.org/W3048701459","https://openalex.org/W2149078538","https://openalex.org/W2080146221","https://openalex.org/W2370314112","https://openalex.org/W1912958759"],"abstract_inverted_index":{"The":[0,55],"architecture":[1],"and":[2,19,31,92,130,160],"programming":[3],"model":[4],"of":[5,40,61,118,181],"current":[6,76],"GPGPUs":[7,51,77],"are":[8,14,70],"best":[9],"suited":[10],"for":[11,45,81,149],"applications":[12],"that":[13,69,148,170],"dominated":[15],"by":[16,64,156],"structured":[17],"control":[18,30],"data":[20,32],"flows":[21],"across":[22],"large":[23],"regular":[24],"datasets.":[25],"Parallel":[26],"workloads":[27,49,129],"with":[28,141,185],"irregular":[29],"structures":[33],"cannot":[34],"easily":[35],"harness":[36],"the":[37,41,59,116,127,150,172],"processing":[38,66],"power":[39],"GPGPU.":[42],"One":[43],"approach":[44,57,169],"mapping":[46],"these":[47],"irregular-parallel":[48,128],"to":[50,114,162,165],"is":[52,171],"using":[53],"work-queues.":[54],"work-queue":[56,85,104],"improves":[58,154],"utilization":[60],"SIMD":[62],"units":[63],"only":[65],"useful":[67],"works":[68],"dynamically":[71],"generated":[72],"during":[73],"execution.":[74],"As":[75],"lack":[78],"necessary":[79],"supports":[80],"work-queues,":[82],"a":[83,101,133,137,142,166],"software-based":[84],"implementation":[86],"often":[87],"suffers":[88],"from":[89],"memory":[90],"contention":[91],"load":[93],"balancing":[94],"issues.":[95],"In":[96],"this":[97],"article,":[98],"we":[99],"present":[100],"novel":[102],"hardware":[103,167,187],"design":[105],"named":[106],"DaQueue":[107,153,176],",":[108],"which":[109],"incorporates":[110],"three":[111],"data-aware":[112],"features":[113],"improve":[115],"efficiency":[117],"work-queues":[119],"on":[120,126,136,158],"GPGPUs.":[121],"We":[122],"evaluate":[123],"our":[124],"proposal":[125],"carry":[131],"out":[132],"case":[134],"study":[135],"path":[138],"tracing":[139],"pipeline":[140],"cycle-level":[143],"simulator.":[144],"Experimental":[145],"results":[146],"show":[147],"tested":[151],"workloads,":[152],"performance":[155],"1.53\u00d7":[157],"average":[159,180],"up":[161],"1.91\u00d7.":[163],"Compared":[164],"worklist":[168],"state-of-the-art":[173],"prior":[174],"work,":[175],"can":[177],"achieve":[178],"an":[179],"33.92%":[182],"extra":[183],"speedup":[184],"less":[186],"area":[188],"cost.":[189]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
