{"id":"https://openalex.org/W4410159138","doi":"https://doi.org/10.1631/fitee.2400453","title":"Minimizing transformer inference overhead using controlling element on Shenwei AI accelerator","display_name":"Minimizing transformer inference overhead using controlling element on Shenwei AI accelerator","publication_year":2025,"publication_date":"2025-04-01","ids":{"openalex":"https://openalex.org/W4410159138","doi":"https://doi.org/10.1631/fitee.2400453"},"language":"en","primary_location":{"id":"doi:10.1631/fitee.2400453","is_oa":false,"landing_page_url":"https://doi.org/10.1631/fitee.2400453","pdf_url":null,"source":{"id":"https://openalex.org/S4210189857","display_name":"Frontiers of Information Technology & Electronic Engineering","issn_l":"2095-9184","issn":["2095-9184","2095-9230"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers of Information Technology &amp; Electronic Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Yulong Zhao","orcid":"https://orcid.org/0009-0003-2291-9499"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yulong Zhao","raw_affiliation_strings":["State Key Laboratory of Mathematical Engineering and Advanced Computing, Wuxi, China"],"raw_orcid":"https://orcid.org/0009-0003-2291-9499","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Mathematical Engineering and Advanced Computing, Wuxi, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026844927","display_name":"Chunxing Wu","orcid":"https://orcid.org/0000-0002-2702-5000"},"institutions":[{"id":"https://openalex.org/I4210148107","display_name":"Space Engineering University","ror":"https://ror.org/04rj1td02","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210148107"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chunzhi Wu","raw_affiliation_strings":["School of Non-Commissioned Officer, Space Engineering University, Beijing, China","State Key Laboratory of Mathematical Engineering and Advanced Computing, Wuxi, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Non-Commissioned Officer, Space Engineering University, Beijing, China","institution_ids":["https://openalex.org/I4210148107"]},{"raw_affiliation_string":"State Key Laboratory of Mathematical Engineering and Advanced Computing, Wuxi, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100672336","display_name":"Yizhuo Wang","orcid":"https://orcid.org/0009-0008-8327-5937"},"institutions":[{"id":"https://openalex.org/I4210158984","display_name":"National Supercomputing Center in Wuxi","ror":"https://ror.org/04ypjrs34","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210158984"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yizhuo Wang","raw_affiliation_strings":["National Supercomputing Center in Wuxi, Wuxi, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Supercomputing Center in Wuxi, Wuxi, China","institution_ids":["https://openalex.org/I4210158984"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016405348","display_name":"Lufei Zhang","orcid":"https://orcid.org/0000-0002-6270-0737"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lufei Zhang","raw_affiliation_strings":["State Key Laboratory of Mathematical Engineering and Advanced Computing, Wuxi, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Mathematical Engineering and Advanced Computing, Wuxi, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005042472","display_name":"Yaguang Zhang","orcid":"https://orcid.org/0000-0002-5445-0555"},"institutions":[{"id":"https://openalex.org/I4210158984","display_name":"National Supercomputing Center in Wuxi","ror":"https://ror.org/04ypjrs34","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210158984"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yaguang Zhang","raw_affiliation_strings":["National Supercomputing Center in Wuxi, Wuxi, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Supercomputing Center in Wuxi, Wuxi, China","institution_ids":["https://openalex.org/I4210158984"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062728630","display_name":"Wenyuan Shen","orcid":null},"institutions":[{"id":"https://openalex.org/I4210158984","display_name":"National Supercomputing Center in Wuxi","ror":"https://ror.org/04ypjrs34","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210158984"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenyuan Shen","raw_affiliation_strings":["National Supercomputing Center in Wuxi, Wuxi, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Supercomputing Center in Wuxi, Wuxi, China","institution_ids":["https://openalex.org/I4210158984"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101535052","display_name":"Hao Fan","orcid":"https://orcid.org/0000-0002-8537-8218"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hao Fan","raw_affiliation_strings":["State Key Laboratory of Mathematical Engineering and Advanced Computing, Wuxi, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Mathematical Engineering and Advanced Computing, Wuxi, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":null,"display_name":"Hankang Fang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210123185","display_name":"Zhejiang Lab","ror":"https://ror.org/02m2h7991","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210123185"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hankang Fang","raw_affiliation_strings":["Zhejiang Lab, Hangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang Lab, Hangzhou, China","institution_ids":["https://openalex.org/I4210123185"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090212980","display_name":"Yi Qin","orcid":"https://orcid.org/0000-0003-4351-2463"},"institutions":[{"id":"https://openalex.org/I4210123185","display_name":"Zhejiang Lab","ror":"https://ror.org/02m2h7991","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210123185"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi Qin","raw_affiliation_strings":["Zhejiang Lab, Hangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang Lab, Hangzhou, China","institution_ids":["https://openalex.org/I4210123185"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100352268","display_name":"Xin Liu","orcid":"https://orcid.org/0000-0002-7870-6535"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xin Liu","raw_affiliation_strings":["National Research Centre of Parallel Computer Engineering and Technology, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-7870-6535","affiliations":[{"raw_affiliation_string":"National Research Centre of Parallel Computer Engineering and Technology, Beijing, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":10,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.07396207,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"26","issue":"4","first_page":"605","last_page":"622"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T14319","display_name":"Currency Recognition and Detection","score":0.97079998254776,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T14319","display_name":"Currency Recognition and Detection","score":0.97079998254776,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9632999897003174,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11343","display_name":"Power Transformer Diagnostics and Insulation","score":0.954800009727478,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.7227661609649658},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6307757496833801},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5413527488708496},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.4298628866672516},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.366118848323822},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2953186631202698},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.19461220502853394},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.1811756193637848},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.16680338978767395},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.06639665365219116}],"concepts":[{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.7227661609649658},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6307757496833801},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5413527488708496},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.4298628866672516},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.366118848323822},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2953186631202698},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.19461220502853394},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.1811756193637848},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.16680338978767395},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.06639665365219116}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1631/fitee.2400453","is_oa":false,"landing_page_url":"https://doi.org/10.1631/fitee.2400453","pdf_url":null,"source":{"id":"https://openalex.org/S4210189857","display_name":"Frontiers of Information Technology & Electronic Engineering","issn_l":"2095-9184","issn":["2095-9184","2095-9230"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers of Information Technology &amp; Electronic Engineering","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W1764493129","https://openalex.org/W1963958818","https://openalex.org/W2238700765","https://openalex.org/W2340076492","https://openalex.org/W2604319603","https://openalex.org/W2735645074","https://openalex.org/W2794532328","https://openalex.org/W2896457183","https://openalex.org/W2948767859","https://openalex.org/W2982050681","https://openalex.org/W2982219368","https://openalex.org/W2991330024","https://openalex.org/W3093977741","https://openalex.org/W3096835151","https://openalex.org/W3099576124","https://openalex.org/W3130716829","https://openalex.org/W3133174490","https://openalex.org/W3172198372","https://openalex.org/W3210432446","https://openalex.org/W3211525823","https://openalex.org/W4200416546","https://openalex.org/W4254648244","https://openalex.org/W4293149165","https://openalex.org/W4297639955","https://openalex.org/W4322718191","https://openalex.org/W6600577311","https://openalex.org/W6605299328","https://openalex.org/W6702248584","https://openalex.org/W6743235451","https://openalex.org/W6818723395"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4206178588","https://openalex.org/W3094491777","https://openalex.org/W3214715529","https://openalex.org/W4287635093"],"abstract_inverted_index":{"Transformer":[0],"models":[1,45,164],"have":[2],"become":[3],"a":[4,22,66,97,121,143],"cornerstone":[5],"of":[6,69,90,106,113,178],"various":[7],"natural":[8],"language":[9],"processing":[10,87],"(NLP)":[11],"tasks.":[12],"However,":[13],"the":[14,19,40,47,70,74,79,85,91,104,114,158,176],"substantial":[15],"computational":[16],"overhead":[17,42,71],"during":[18,152],"inference":[20,41,76,138,186],"remains":[21],"significant":[23],"challenge,":[24],"limiting":[25],"their":[26],"deployment":[27],"in":[28,43],"practical":[29],"applications.":[30],"In":[31],"this":[32,36],"study,":[33],"we":[34,64,83,119,141],"address":[35],"challenge":[37],"by":[38,59],"minimizing":[39],"transformer":[44,75,179],"using":[46,126],"controlling":[48],"element":[49,88],"on":[50,188],"artificial":[51],"intelligence":[52],"(AI)":[53],"accelerators.":[54,190],"Our":[55,172],"work":[56],"is":[57],"anchored":[58],"four":[60],"key":[61],"contributions.":[62],"First,":[63],"conduct":[65],"comprehensive":[67],"analysis":[68],"composition":[72],"within":[73],"process,":[77],"identifying":[78],"primary":[80],"bottlenecks.":[81],"Second,":[82],"leverage":[84],"management":[86,124],"(MPE)":[89],"Shenwei":[92],"AI":[93,189],"(SWAI)":[94],"accelerator,":[95],"implementing":[96],"three-tier":[98],"scheduling":[99],"framework":[100],"that":[101,148],"significantly":[102,130,174],"reduces":[103,131],"number":[105],"host-device":[107],"launches":[108],"to":[109,169],"approximately":[110],"1/10":[111],"000":[112],"original":[115],"PyTorch-GPU":[116],"setup.":[117],"Third,":[118],"introduce":[120],"zero-copy":[122],"memory":[123,132],"technique":[125],"segment-page":[127],"fusion,":[128],"which":[129],"access":[133],"latency":[134],"and":[135,155,184],"improves":[136],"overall":[137],"efficiency.":[139],"Finally,":[140],"develop":[142],"fast":[144],"model":[145,153],"loading":[146,160],"method":[147],"eliminates":[149],"redundant":[150],"computations":[151],"verification":[154],"initialization,":[156],"reducing":[157],"total":[159],"time":[161],"for":[162],"large":[163],"from":[165],"22":[166],"128.31":[167],"ms":[168],"1041.72":[170],"ms.":[171],"contributions":[173],"enhance":[175],"optimization":[177],"models,":[180],"enabling":[181],"more":[182],"efficient":[183],"expedited":[185],"processes":[187]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
