{"id":"https://openalex.org/W4367277178","doi":"https://doi.org/10.1109/tc.2023.3271060","title":"DHTS: A Dynamic Hybrid Tiling Strategy for Optimizing Stencil Computation on GPUs","display_name":"DHTS: A Dynamic Hybrid Tiling Strategy for Optimizing Stencil Computation on GPUs","publication_year":2023,"publication_date":"2023-04-27","ids":{"openalex":"https://openalex.org/W4367277178","doi":"https://doi.org/10.1109/tc.2023.3271060"},"language":"en","primary_location":{"id":"doi:10.1109/tc.2023.3271060","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/tc.2023.3271060","pdf_url":null,"source":{"id":"https://openalex.org/S157670870","display_name":"IEEE Transactions on Computers","issn_l":"0018-9340","issn":["0018-9340","1557-9956","2326-3814"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computers","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100339577","display_name":"Song Liu","orcid":"https://orcid.org/0000-0001-7120-894X"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Song Liu","raw_affiliation_strings":["School of Computer Science and Technology, Xi&#x0027;an Jiaotong University, Shaanxi, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Xi&#x0027;an Jiaotong University, Shaanxi, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060787498","display_name":"Zengyuan Zhang","orcid":"https://orcid.org/0000-0002-1643-4191"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zengyuan Zhang","raw_affiliation_strings":["School of Computer Science and Technology, Xi&#x0027;an Jiaotong University, Shaanxi, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Xi&#x0027;an Jiaotong University, Shaanxi, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103122958","display_name":"Weiguo Wu","orcid":"https://orcid.org/0000-0001-8699-9115"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weiguo Wu","raw_affiliation_strings":["School of Computer Science and Technology, Xi&#x0027;an Jiaotong University, Shaanxi, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Xi&#x0027;an Jiaotong University, Shaanxi, China","institution_ids":["https://openalex.org/I87445476"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100339577"],"corresponding_institution_ids":["https://openalex.org/I87445476"],"apc_list":null,"apc_paid":null,"fwci":0.3479,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.63568324,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"72","issue":"10","first_page":"2795","last_page":"2807"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9919999837875366,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9919999837875366,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12162","display_name":"Cellular Automata and Applications","score":0.9908000230789185,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9900000095367432,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/stencil","display_name":"Stencil","score":0.905342161655426},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8520373106002808},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.8203431367874146},{"id":"https://openalex.org/keywords/loop-tiling","display_name":"Loop tiling","score":0.5568312406539917},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.544292151927948},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.5429809093475342},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.5242607593536377},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.36022329330444336},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.33256056904792786},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.2170041799545288},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.14444288611412048}],"concepts":[{"id":"https://openalex.org/C76752949","wikidata":"https://www.wikidata.org/wiki/Q7607499","display_name":"Stencil","level":2,"score":0.905342161655426},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8520373106002808},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.8203431367874146},{"id":"https://openalex.org/C11799548","wikidata":"https://www.wikidata.org/wiki/Q6675847","display_name":"Loop tiling","level":3,"score":0.5568312406539917},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.544292151927948},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.5429809093475342},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.5242607593536377},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.36022329330444336},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.33256056904792786},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2170041799545288},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.14444288611412048},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tc.2023.3271060","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/tc.2023.3271060","pdf_url":null,"source":{"id":"https://openalex.org/S157670870","display_name":"IEEE Transactions on Computers","issn_l":"0018-9340","issn":["0018-9340","1557-9956","2326-3814"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computers","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Affordable and clean energy","score":0.4699999988079071,"id":"https://metadata.un.org/sdg/7"}],"awards":[{"id":"https://openalex.org/G795464212","display_name":null,"funder_award_id":"62002279","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W1503700136","https://openalex.org/W1965081941","https://openalex.org/W1968013322","https://openalex.org/W1973532523","https://openalex.org/W2035430533","https://openalex.org/W2063656563","https://openalex.org/W2077143534","https://openalex.org/W2092750045","https://openalex.org/W2095875205","https://openalex.org/W2102976251","https://openalex.org/W2166622045","https://openalex.org/W2528222261","https://openalex.org/W2608177516","https://openalex.org/W2760907672","https://openalex.org/W2769005600","https://openalex.org/W2889543163","https://openalex.org/W2923764608","https://openalex.org/W2936463352","https://openalex.org/W2966071912","https://openalex.org/W2980104160","https://openalex.org/W2984920043","https://openalex.org/W2995257150","https://openalex.org/W2996929894","https://openalex.org/W3022572479","https://openalex.org/W3099814709","https://openalex.org/W3149084271","https://openalex.org/W3164722309","https://openalex.org/W3170375455","https://openalex.org/W3171525923","https://openalex.org/W3172873298","https://openalex.org/W3196320218","https://openalex.org/W3203437055","https://openalex.org/W4224277035","https://openalex.org/W4226236679","https://openalex.org/W4244512595","https://openalex.org/W4244580719","https://openalex.org/W4255560897","https://openalex.org/W6643480586","https://openalex.org/W6746694394"],"related_works":["https://openalex.org/W1509211761","https://openalex.org/W3091752332","https://openalex.org/W2055312318","https://openalex.org/W2160551264","https://openalex.org/W2083051666","https://openalex.org/W2582456645","https://openalex.org/W2372170743","https://openalex.org/W1517816648","https://openalex.org/W2170268965","https://openalex.org/W2355696437"],"abstract_inverted_index":{"Stencil":[0],"computation":[1],"is":[2],"an":[3],"important":[4],"class":[5],"of":[6,54,135,176,189],"computational":[7],"modes":[8],"in":[9],"scientific":[10],"computing":[11],"applications.":[12],"Loop":[13],"tiling":[14,35,88,96,111,182],"techniques":[15],"have":[16],"been":[17],"widely":[18],"studied":[19],"to":[20,42,68,101,117,180],"accelerate":[21],"stencil":[22,91],"computations":[23],"on":[24,76,97,113,161],"different":[25,155],"architectures":[26],"by":[27],"exploiting":[28],"parallelism":[29],"and":[30,63,65,72,121,137,142,164,184],"data":[31],"locality.":[32],"Recent":[33],"advanced":[34],"methods":[36,49],"enable":[37],"the":[38,44,98,114,151,173,186],"tile-wise":[39],"concurrent":[40,103],"start-up":[41,104],"improve":[43,118],"execution":[45,174],"performance.":[46],"However,":[47],"such":[48],"statically":[50],"partition":[51],"all":[52],"dimensions":[53,100,116],"iteration":[55],"space":[56],"into":[57],"tiles":[58],"with":[59,154],"predetermined":[60],"complex":[61,156],"shapes":[62],"sizes,":[64],"thus":[66],"lead":[67],"low":[69],"thread":[70,119],"utilization":[71,120],"memory":[73,122,145],"access":[74,123],"efficiency":[75],"GPUs.":[77],"In":[78],"this":[79],"paper,":[80],"we":[81],"present":[82],"DHTS,":[83],"a":[84,108],"novel":[85],"dynamic":[86,109],"hybrid":[87],"strategy":[89,153],"for":[90],"computations.":[92],"DHTS":[93,130,170],"employs":[94],"static":[95],"outer":[99],"achieve":[102],"parallelism,":[105],"while":[106],"proposes":[107],"rectangular":[110],"method":[112],"inner":[115],"efficiency.":[124],"By":[125],"deriving":[126],"tile":[127,157],"size":[128],"constraints,":[129],"adaptively":[131],"achieves":[132,185],"equal-size":[133],"workload":[134],"tiles,":[136],"therefore":[138],"reducing":[139],"idle":[140],"threads":[141],"increasing":[143],"coalesced":[144],"accesses":[146],"within":[147],"tiles.":[148],"We":[149],"implement":[150],"proposed":[152],"shapes.":[158],"Experimental":[159],"results":[160],"Titan":[162],"V":[163],"Tesla":[165],"V100":[166],"GPUs":[167],"show":[168],"that":[169],"effectively":[171],"improves":[172],"performance":[175],"2D/3D":[177],"stencils":[178],"compared":[179],"state-of-the-art":[181],"methods,":[183],"best":[187],"improvement":[188],"28\u00d7.":[190]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
