{"id":"https://openalex.org/W4416794666","doi":"https://doi.org/10.1007/s42514-025-00261-y","title":"Optimizing winograd-based convolution with DCU\u2019s matrix cores","display_name":"Optimizing winograd-based convolution with DCU\u2019s matrix cores","publication_year":2025,"publication_date":"2025-11-28","ids":{"openalex":"https://openalex.org/W4416794666","doi":"https://doi.org/10.1007/s42514-025-00261-y"},"language":"en","primary_location":{"id":"doi:10.1007/s42514-025-00261-y","is_oa":false,"landing_page_url":"https://doi.org/10.1007/s42514-025-00261-y","pdf_url":null,"source":{"id":"https://openalex.org/S4210190911","display_name":"CCF Transactions on High Performance Computing","issn_l":"2524-4922","issn":["2524-4922","2524-4930"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"CCF Transactions on High Performance Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5022800604","display_name":"Jiandong Shang","orcid":"https://orcid.org/0009-0001-7673-2641"},"institutions":[{"id":"https://openalex.org/I38877650","display_name":"Zhengzhou University","ror":"https://ror.org/04ypx8c21","country_code":"CN","type":"education","lineage":["https://openalex.org/I38877650"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jiandong Shang","raw_affiliation_strings":["National Supercomputing Center in Zhengzhou, Zhengzhou University, No.100 Science Avenue, Zhengzhou, 450001, Henan, China","School of Computer and Artificial Intelligence, Zhengzhou University, No.100 Science Avenue, Zhengzhou, 450001, Henan, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Supercomputing Center in Zhengzhou, Zhengzhou University, No.100 Science Avenue, Zhengzhou, 450001, Henan, China","institution_ids":["https://openalex.org/I38877650"]},{"raw_affiliation_string":"School of Computer and Artificial Intelligence, Zhengzhou University, No.100 Science Avenue, Zhengzhou, 450001, Henan, China","institution_ids":["https://openalex.org/I38877650"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048701309","display_name":"Fuchang Gao","orcid":"https://orcid.org/0000-0001-7699-4433"},"institutions":[{"id":"https://openalex.org/I38877650","display_name":"Zhengzhou University","ror":"https://ror.org/04ypx8c21","country_code":"CN","type":"education","lineage":["https://openalex.org/I38877650"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fuchang Gao","raw_affiliation_strings":["School of Computer and Artificial Intelligence, Zhengzhou University, No.100 Science Avenue, Zhengzhou, 450001, Henan, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer and Artificial Intelligence, Zhengzhou University, No.100 Science Avenue, Zhengzhou, 450001, Henan, China","institution_ids":["https://openalex.org/I38877650"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010639580","display_name":"Zhaopeng Li","orcid":"https://orcid.org/0000-0002-1378-337X"},"institutions":[{"id":"https://openalex.org/I38877650","display_name":"Zhengzhou University","ror":"https://ror.org/04ypx8c21","country_code":"CN","type":"education","lineage":["https://openalex.org/I38877650"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhaopeng Li","raw_affiliation_strings":["School of Computer and Artificial Intelligence, Zhengzhou University, No.100 Science Avenue, Zhengzhou, 450001, Henan, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer and Artificial Intelligence, Zhengzhou University, No.100 Science Avenue, Zhengzhou, 450001, Henan, China","institution_ids":["https://openalex.org/I38877650"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yizhe Sui","orcid":null},"institutions":[{"id":"https://openalex.org/I38877650","display_name":"Zhengzhou University","ror":"https://ror.org/04ypx8c21","country_code":"CN","type":"education","lineage":["https://openalex.org/I38877650"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yizhe Sui","raw_affiliation_strings":["School of Computer and Artificial Intelligence, Zhengzhou University, No.100 Science Avenue, Zhengzhou, 450001, Henan, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer and Artificial Intelligence, Zhengzhou University, No.100 Science Avenue, Zhengzhou, 450001, Henan, China","institution_ids":["https://openalex.org/I38877650"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101846482","display_name":"Gang Wu","orcid":"https://orcid.org/0000-0002-6615-0699"},"institutions":[{"id":"https://openalex.org/I38877650","display_name":"Zhengzhou University","ror":"https://ror.org/04ypx8c21","country_code":"CN","type":"education","lineage":["https://openalex.org/I38877650"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Gang Wu","raw_affiliation_strings":["National Supercomputing Center in Zhengzhou, Zhengzhou University, No.100 Science Avenue, Zhengzhou, 450001, Henan, China","School of Computer and Artificial Intelligence, Zhengzhou University, No.100 Science Avenue, Zhengzhou, 450001, Henan, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Supercomputing Center in Zhengzhou, Zhengzhou University, No.100 Science Avenue, Zhengzhou, 450001, Henan, China","institution_ids":["https://openalex.org/I38877650"]},{"raw_affiliation_string":"School of Computer and Artificial Intelligence, Zhengzhou University, No.100 Science Avenue, Zhengzhou, 450001, Henan, China","institution_ids":["https://openalex.org/I38877650"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100332796","display_name":"Nan Wang","orcid":"https://orcid.org/0000-0003-3662-3675"},"institutions":[{"id":"https://openalex.org/I25254941","display_name":"Beijing Normal University","ror":"https://ror.org/022k4wk35","country_code":"CN","type":"education","lineage":["https://openalex.org/I25254941"]},{"id":"https://openalex.org/I4210123392","display_name":"Beijing Municipal Ecological and Environmental Monitoring Center","ror":"https://ror.org/02ygbbs43","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210123392"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Nan Wang","raw_affiliation_strings":["College of Global Change and Earth System Science, Faculty of Geographical Science, Beijing Normal University, No.19, Xinjiekouwai Street, Beijing, 100875, Beijing, China","Henan Ecological Environmental Monitoring and Safety Center, Henan Key Laboratory of Environmental Monitoring Technology, No.10 Xueli Road, Zhengzhou, 450008, Henan, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of Global Change and Earth System Science, Faculty of Geographical Science, Beijing Normal University, No.19, Xinjiekouwai Street, Beijing, 100875, Beijing, China","institution_ids":["https://openalex.org/I25254941"]},{"raw_affiliation_string":"Henan Ecological Environmental Monitoring and Safety Center, Henan Key Laboratory of Environmental Monitoring Technology, No.10 Xueli Road, Zhengzhou, 450008, Henan, China","institution_ids":["https://openalex.org/I4210123392"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100321135","display_name":"Lingling Wang","orcid":"https://orcid.org/0000-0002-1049-9170"},"institutions":[{"id":"https://openalex.org/I4210123392","display_name":"Beijing Municipal Ecological and Environmental Monitoring Center","ror":"https://ror.org/02ygbbs43","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210123392"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lingling Wang","raw_affiliation_strings":["Henan Ecological Environmental Monitoring and Safety Center, Henan Key Laboratory of Environmental Monitoring Technology, No.10 Xueli Road, Zhengzhou, 450008, Henan, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Henan Ecological Environmental Monitoring and Safety Center, Henan Key Laboratory of Environmental Monitoring Technology, No.10 Xueli Road, Zhengzhou, 450008, Henan, China","institution_ids":["https://openalex.org/I4210123392"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5068068809","display_name":"Dujuan Zhang","orcid":"https://orcid.org/0009-0004-2733-3064"},"institutions":[{"id":"https://openalex.org/I38877650","display_name":"Zhengzhou University","ror":"https://ror.org/04ypx8c21","country_code":"CN","type":"education","lineage":["https://openalex.org/I38877650"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dujuan Zhang","raw_affiliation_strings":["National Supercomputing Center in Zhengzhou, Zhengzhou University, No.100 Science Avenue, Zhengzhou, 450001, Henan, China","School of Computer and Artificial Intelligence, Zhengzhou University, No.100 Science Avenue, Zhengzhou, 450001, Henan, China"],"raw_orcid":"https://orcid.org/0009-0004-2733-3064","affiliations":[{"raw_affiliation_string":"National Supercomputing Center in Zhengzhou, Zhengzhou University, No.100 Science Avenue, Zhengzhou, 450001, Henan, China","institution_ids":["https://openalex.org/I38877650"]},{"raw_affiliation_string":"School of Computer and Artificial Intelligence, Zhengzhou University, No.100 Science Avenue, Zhengzhou, 450001, Henan, China","institution_ids":["https://openalex.org/I38877650"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5022800604"],"corresponding_institution_ids":["https://openalex.org/I38877650"],"apc_list":{"value":2190,"currency":"EUR","value_usd":2790},"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.40010283,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"8","issue":"1","first_page":"107","last_page":"119"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.515500009059906,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.515500009059906,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.12729999423027039,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11697","display_name":"Numerical Methods and Algorithms","score":0.09950000047683716,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.5353000164031982},{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.5212000012397766},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.4846000075340271},{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting (machine learning)","score":0.48350000381469727},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.48260000348091125},{"id":"https://openalex.org/keywords/transformation","display_name":"Transformation (genetics)","score":0.46619999408721924},{"id":"https://openalex.org/keywords/transformation-matrix","display_name":"Transformation matrix","score":0.4327000081539154},{"id":"https://openalex.org/keywords/interleaving","display_name":"Interleaving","score":0.43220001459121704},{"id":"https://openalex.org/keywords/matrix","display_name":"Matrix (chemical analysis)","score":0.4198000133037567}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6575999855995178},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.5903000235557556},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.5353000164031982},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.5212000012397766},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.4846000075340271},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.48350000381469727},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.48260000348091125},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.47690001130104065},{"id":"https://openalex.org/C204241405","wikidata":"https://www.wikidata.org/wiki/Q461499","display_name":"Transformation (genetics)","level":3,"score":0.46619999408721924},{"id":"https://openalex.org/C165443888","wikidata":"https://www.wikidata.org/wiki/Q1482183","display_name":"Transformation matrix","level":3,"score":0.4327000081539154},{"id":"https://openalex.org/C28034677","wikidata":"https://www.wikidata.org/wiki/Q17092530","display_name":"Interleaving","level":2,"score":0.43220001459121704},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.4198000133037567},{"id":"https://openalex.org/C60292330","wikidata":"https://www.wikidata.org/wiki/Q1014065","display_name":"Hadamard transform","level":2,"score":0.3747999966144562},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.3287000060081482},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.32409998774528503},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.3221000134944916},{"id":"https://openalex.org/C3265923","wikidata":"https://www.wikidata.org/wiki/Q669129","display_name":"Arithmetic underflow","level":2,"score":0.31630000472068787},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.3125},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.30250000953674316},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3003999888896942},{"id":"https://openalex.org/C39096654","wikidata":"https://www.wikidata.org/wiki/Q728507","display_name":"Strassen algorithm","level":4,"score":0.2915000021457672},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.28929999470710754},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.28870001435279846},{"id":"https://openalex.org/C94375191","wikidata":"https://www.wikidata.org/wiki/Q11205","display_name":"Arithmetic","level":1,"score":0.2874999940395355},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.28049999475479126},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.2782999873161316},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.27489998936653137},{"id":"https://openalex.org/C84462506","wikidata":"https://www.wikidata.org/wiki/Q173142","display_name":"Digital signal processing","level":2,"score":0.27000001072883606},{"id":"https://openalex.org/C104267543","wikidata":"https://www.wikidata.org/wiki/Q208163","display_name":"Signal processing","level":3,"score":0.26750001311302185},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.26159998774528503},{"id":"https://openalex.org/C176649486","wikidata":"https://www.wikidata.org/wiki/Q2308807","display_name":"Memory management","level":3,"score":0.25850000977516174},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.25369998812675476}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s42514-025-00261-y","is_oa":false,"landing_page_url":"https://doi.org/10.1007/s42514-025-00261-y","pdf_url":null,"source":{"id":"https://openalex.org/S4210190911","display_name":"CCF Transactions on High Performance Computing","issn_l":"2524-4922","issn":["2524-4922","2524-4930"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"CCF Transactions on High Performance Computing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W2172654076","https://openalex.org/W2622826443","https://openalex.org/W2978452617","https://openalex.org/W2998957070","https://openalex.org/W3091804697","https://openalex.org/W3095733721","https://openalex.org/W3166036439","https://openalex.org/W3194710734","https://openalex.org/W3204140704","https://openalex.org/W4206841102","https://openalex.org/W4286635279","https://openalex.org/W4381282557","https://openalex.org/W4401361028","https://openalex.org/W4405361299"],"related_works":[],"abstract_inverted_index":null,"counts_by_year":[],"updated_date":"2026-02-11T14:41:00.668223","created_date":"2025-11-28T00:00:00"}
