{"id":"https://openalex.org/W4283029140","doi":"https://doi.org/10.1145/3524059.3532392","title":"Toward accelerated stencil computation by adapting tensor core unit on GPU","display_name":"Toward accelerated stencil computation by adapting tensor core unit on GPU","publication_year":2022,"publication_date":"2022-06-16","ids":{"openalex":"https://openalex.org/W4283029140","doi":"https://doi.org/10.1145/3524059.3532392"},"language":"en","primary_location":{"id":"doi:10.1145/3524059.3532392","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3524059.3532392","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 36th ACM International Conference on Supercomputing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100324634","display_name":"Xiaoyan Liu","orcid":"https://orcid.org/0000-0002-1109-3869"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xiaoyan Liu","raw_affiliation_strings":["Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100330538","display_name":"Yi Liu","orcid":"https://orcid.org/0000-0002-4911-1161"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi Liu","raw_affiliation_strings":["Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018705589","display_name":"Hailong Yang","orcid":"https://orcid.org/0000-0003-1101-7927"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hailong Yang","raw_affiliation_strings":["Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046015937","display_name":"Jianjin Liao","orcid":null},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianjin Liao","raw_affiliation_strings":["Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046708261","display_name":"Mingzhen Li","orcid":"https://orcid.org/0000-0002-4115-9072"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingzhen Li","raw_affiliation_strings":["Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074183877","display_name":"Zhongzhi Luan","orcid":"https://orcid.org/0000-0002-7186-0556"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhongzhi Luan","raw_affiliation_strings":["Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079362609","display_name":"Depei Qian","orcid":"https://orcid.org/0000-0002-5382-1473"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Depei Qian","raw_affiliation_strings":["Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5100324634"],"corresponding_institution_ids":["https://openalex.org/I82880672"],"apc_list":null,"apc_paid":null,"fwci":7.4929,"has_fulltext":false,"cited_by_count":35,"citation_normalized_percentile":{"value":0.9798995,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"12"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10792","display_name":"Matrix Theory and Algorithms","score":0.9901000261306763,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/stencil","display_name":"Stencil","score":0.9581553936004639},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8193231225013733},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6958725452423096},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5531288385391235},{"id":"https://openalex.org/keywords/memory-hierarchy","display_name":"Memory hierarchy","score":0.5059236884117126},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.4763919711112976},{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.4712802767753601},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.41450440883636475},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.41024070978164673},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.20754751563072205},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.1705639660358429},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.10283783078193665}],"concepts":[{"id":"https://openalex.org/C76752949","wikidata":"https://www.wikidata.org/wiki/Q7607499","display_name":"Stencil","level":2,"score":0.9581553936004639},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8193231225013733},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6958725452423096},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5531288385391235},{"id":"https://openalex.org/C2778100165","wikidata":"https://www.wikidata.org/wiki/Q1589327","display_name":"Memory hierarchy","level":3,"score":0.5059236884117126},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.4763919711112976},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.4712802767753601},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.41450440883636475},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.41024070978164673},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.20754751563072205},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.1705639660358429},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.10283783078193665},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C84114770","wikidata":"https://www.wikidata.org/wiki/Q46344","display_name":"Quantum","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3524059.3532392","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3524059.3532392","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 36th ACM International Conference on Supercomputing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1378380340","display_name":null,"funder_award_id":"2020YFB1506703","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G5774836301","display_name":null,"funder_award_id":"62072018","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W2003264742","https://openalex.org/W2008005532","https://openalex.org/W2016618963","https://openalex.org/W2074833026","https://openalex.org/W2077143534","https://openalex.org/W2104512032","https://openalex.org/W2315715336","https://openalex.org/W2786544209","https://openalex.org/W2889543163","https://openalex.org/W2895305554","https://openalex.org/W2901549770","https://openalex.org/W2903376718","https://openalex.org/W2922522393","https://openalex.org/W2936463352","https://openalex.org/W2968753512","https://openalex.org/W2975638172","https://openalex.org/W2984920043","https://openalex.org/W2996929894","https://openalex.org/W3008258313","https://openalex.org/W3020936970","https://openalex.org/W3089631566","https://openalex.org/W3099814709","https://openalex.org/W3104528661","https://openalex.org/W3126212707","https://openalex.org/W3132357455","https://openalex.org/W3175464720","https://openalex.org/W3196320218","https://openalex.org/W3205436637","https://openalex.org/W3205799311","https://openalex.org/W3208099998","https://openalex.org/W4200091031","https://openalex.org/W4232301837","https://openalex.org/W4244063180","https://openalex.org/W4251637954","https://openalex.org/W4285503902"],"related_works":["https://openalex.org/W1999524790","https://openalex.org/W3106055984","https://openalex.org/W2177020196","https://openalex.org/W2996929894","https://openalex.org/W4223480868","https://openalex.org/W2111914791","https://openalex.org/W2244508497","https://openalex.org/W2363677236","https://openalex.org/W4387963725","https://openalex.org/W2246512960"],"abstract_inverted_index":{"The":[0,168],"Tensor":[1],"Core":[2],"Unit":[3],"(TCU)":[4],"has":[5],"been":[6],"increasingly":[7],"adopted":[8],"on":[9,149,162],"modern":[10],"high":[11,70],"performance":[12,18,71,177],"processors,":[13],"specialized":[14],"in":[15,39,66,127],"boosting":[16],"the":[17,67,74,116,131,180],"of":[19,69,76,122,134],"general":[20],"matrix":[21],"multiplication":[22],"(GEMM).":[23],"Due":[24],"to":[25,54,88,106,129,179],"its":[26,93],"highly":[27],"optimized":[28],"hardware":[29],"design,":[30],"TCU":[31,53,89,108,145],"can":[32,174],"significantly":[33],"accelerate":[34,55],"GEMM-based":[35],"operations":[36,57,126],"widely":[37],"used":[38],"scientific":[40],"as":[41,43,59,119],"well":[42],"deep":[44],"learning":[45],"applications.":[46],"However,":[47],"there":[48,79],"is":[49,63,80],"few":[50],"work":[51,83],"exploiting":[52,144],"non-GEMM":[56],"such":[58],"stencil":[60,86,111,117,182],"computation":[61,87,118],"that":[62,84],"also":[64],"important":[65],"field":[68],"computing.":[72],"To":[73],"best":[75],"our":[77,153,172],"knowledge,":[78],"no":[81],"previous":[82],"adapts":[85],"efficiently":[90],"by":[91],"considering":[92],"unique":[94],"characteristics.":[95],"In":[96,136],"this":[97],"paper,":[98],"we":[99,114,138],"propose":[100,139],"a":[101,120],"new":[102],"method":[103,154,173],"called":[104],"TCstencil":[105],"adapt":[107],"for":[109,142],"accelerating":[110],"computation.":[112],"Specifically,":[113],"re-design":[115],"series":[121],"reduction":[123],"and":[124,146,158,165],"summation":[125],"order":[128],"leverage":[130],"computing":[132],"power":[133],"TCU.":[135],"addition,":[137],"corresponding":[140],"optimizations":[141],"better":[143],"memory":[147],"hierarchy":[148],"GPU.":[150],"We":[151],"evaluate":[152],"with":[155],"different":[156],"stencils":[157],"input":[159],"mesh":[160],"sizes":[161],"NVIDIA":[163],"A100":[164],"V100":[166],"GPUs.":[167],"experiment":[169],"results":[170],"demonstrate":[171],"achieve":[175],"superior":[176],"compared":[178],"state-of-the-art":[181],"optimization":[183],"frameworks.":[184]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":13},{"year":2024,"cited_by_count":13},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
