{"id":"https://openalex.org/W2962871385","doi":"https://doi.org/10.1145/3337821.3337883","title":"swATOP","display_name":"swATOP","publication_year":2019,"publication_date":"2019-07-25","ids":{"openalex":"https://openalex.org/W2962871385","doi":"https://doi.org/10.1145/3337821.3337883","mag":"2962871385"},"language":"en","primary_location":{"id":"doi:10.1145/3337821.3337883","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3337821.3337883","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 48th International Conference on Parallel Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101761894","display_name":"Wei Gao","orcid":"https://orcid.org/0009-0003-5952-3922"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wei Gao","raw_affiliation_strings":["Tsinghua University"],"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069911612","display_name":"Jiarui Fang","orcid":"https://orcid.org/0000-0002-6724-2763"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiarui Fang","raw_affiliation_strings":["Tsinghua University"],"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015849734","display_name":"Wenlai Zhao","orcid":"https://orcid.org/0000-0003-1036-4732"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenlai Zhao","raw_affiliation_strings":["Tsinghua University"],"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077293910","display_name":"Jinzhe Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jinzhe Yang","raw_affiliation_strings":["Imperial College London"],"affiliations":[{"raw_affiliation_string":"Imperial College London","institution_ids":["https://openalex.org/I47508984"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108688180","display_name":"Long Wang","orcid":"https://orcid.org/0009-0008-4103-1782"},"institutions":[{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Long Wang","raw_affiliation_strings":["System Department of Baidu"],"affiliations":[{"raw_affiliation_string":"System Department of Baidu","institution_ids":["https://openalex.org/I98301712"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038750675","display_name":"Lin Gan","orcid":"https://orcid.org/0000-0003-1297-4462"},"institutions":[{"id":"https://openalex.org/I4210158984","display_name":"National Supercomputing Center in Wuxi","ror":"https://ror.org/04ypjrs34","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210158984"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lin Gan","raw_affiliation_strings":["Tsinghua University, National Supercomputing Center in Wuxi"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, National Supercomputing Center in Wuxi","institution_ids":["https://openalex.org/I4210158984","https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031545295","display_name":"Haohuan Fu","orcid":"https://orcid.org/0000-0002-6982-2235"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]},{"id":"https://openalex.org/I4210158984","display_name":"National Supercomputing Center in Wuxi","ror":"https://ror.org/04ypjrs34","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210158984"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haohuan Fu","raw_affiliation_strings":["Tsinghua University, National Supercomputing Center in Wuxi"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, National Supercomputing Center in Wuxi","institution_ids":["https://openalex.org/I4210158984","https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5115591592","display_name":"Guangwen Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]},{"id":"https://openalex.org/I4210158984","display_name":"National Supercomputing Center in Wuxi","ror":"https://ror.org/04ypjrs34","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210158984"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guangwen Yang","raw_affiliation_strings":["Tsinghua University National Supercomputing Center in Wuxi"],"affiliations":[{"raw_affiliation_string":"Tsinghua University National Supercomputing Center in Wuxi","institution_ids":["https://openalex.org/I4210158984","https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5101761894"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.2408,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.48056353,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"10"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7902321815490723},{"id":"https://openalex.org/keywords/executable","display_name":"Executable","score":0.7326951026916504},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5655694007873535},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5165830850601196},{"id":"https://openalex.org/keywords/code-generation","display_name":"Code generation","score":0.4882200062274933},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.47377634048461914},{"id":"https://openalex.org/keywords/program-optimization","display_name":"Program optimization","score":0.4673215448856354},{"id":"https://openalex.org/keywords/optimization-problem","display_name":"Optimization problem","score":0.46370038390159607},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4448099434375763},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.44370728731155396},{"id":"https://openalex.org/keywords/generator","display_name":"Generator (circuit theory)","score":0.4244115352630615},{"id":"https://openalex.org/keywords/domain-specific-language","display_name":"Domain-specific language","score":0.41549280285835266},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.4100019335746765},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.37291771173477173},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.3691123127937317},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.34787023067474365},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.23711815476417542},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.19434407353401184},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.18342086672782898}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7902321815490723},{"id":"https://openalex.org/C160145156","wikidata":"https://www.wikidata.org/wiki/Q778586","display_name":"Executable","level":2,"score":0.7326951026916504},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5655694007873535},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5165830850601196},{"id":"https://openalex.org/C133162039","wikidata":"https://www.wikidata.org/wiki/Q1061077","display_name":"Code generation","level":3,"score":0.4882200062274933},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.47377634048461914},{"id":"https://openalex.org/C139571649","wikidata":"https://www.wikidata.org/wiki/Q1156793","display_name":"Program optimization","level":3,"score":0.4673215448856354},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.46370038390159607},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4448099434375763},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.44370728731155396},{"id":"https://openalex.org/C2780992000","wikidata":"https://www.wikidata.org/wiki/Q17016113","display_name":"Generator (circuit theory)","level":3,"score":0.4244115352630615},{"id":"https://openalex.org/C135257023","wikidata":"https://www.wikidata.org/wiki/Q691358","display_name":"Domain-specific language","level":2,"score":0.41549280285835266},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.4100019335746765},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.37291771173477173},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3691123127937317},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.34787023067474365},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.23711815476417542},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.19434407353401184},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.18342086672782898},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3337821.3337883","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3337821.3337883","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 48th International Conference on Parallel Processing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W1530262073","https://openalex.org/W1667652561","https://openalex.org/W1686810756","https://openalex.org/W2034761517","https://openalex.org/W2051305716","https://openalex.org/W2055312318","https://openalex.org/W2077143534","https://openalex.org/W2135653967","https://openalex.org/W2136952590","https://openalex.org/W2153185479","https://openalex.org/W2172654076","https://openalex.org/W2194775991","https://openalex.org/W2294854295","https://openalex.org/W2521727659","https://openalex.org/W2726914369","https://openalex.org/W2728256789","https://openalex.org/W2753495321","https://openalex.org/W2786320458","https://openalex.org/W2804032941","https://openalex.org/W2885356209","https://openalex.org/W2899133614","https://openalex.org/W2935753164","https://openalex.org/W2963037989","https://openalex.org/W3151489216","https://openalex.org/W4251637954"],"related_works":["https://openalex.org/W1560708224","https://openalex.org/W301619107","https://openalex.org/W1964587130","https://openalex.org/W2475659450","https://openalex.org/W1534985845","https://openalex.org/W2794657471","https://openalex.org/W2079017796","https://openalex.org/W4236935678","https://openalex.org/W1970499202","https://openalex.org/W1531339993"],"abstract_inverted_index":{"Achieving":[0],"an":[1,55,79,109,143,148,159],"optimized":[2],"mapping":[3],"of":[4,33,92,126,132,168,200],"Deep":[5],"Learning":[6],"(DL)":[7],"operators":[8,75,196],"to":[9,16,30,105,157,164,171,177,189,208,221],"new":[10],"hardware":[11,135],"architectures":[12],"is":[13,61,83,103,121,187],"the":[14,31,42,49,90,133,179,204,212],"key":[15],"building":[17],"a":[18,64,93,124,141,151,166,173,209],"scalable":[19],"DL":[20,34,69,74,94,195],"system.":[21],"However,":[22],"handcrafted":[23],"optimization":[24,115,120,139],"involves":[25],"huge":[26],"engineering":[27],"efforts,":[28],"due":[29],"variety":[32],"operator":[35,70,95],"implementations":[36],"and":[37,107,116,150,176,214],"complex":[38],"programming":[39,169],"skills.":[40],"Targeting":[41],"innovative":[43],"many-core":[44],"processor":[45],"SW26010":[46],"adopted":[47],"by":[48,112],"3rd":[50],"fastest":[51],"supercomputer":[52],"Sunway":[53],"TaihuLight,":[54],"end-to-end":[56],"automated":[57],"framework":[58],"called":[59],"swATOP":[60,102,186],"presented":[62],"as":[63],"more":[65],"practical":[66],"solution":[67],"for":[68],"optimization.":[71,118,206],"Arithmetic":[72],"intensive":[73],"are":[76],"expressed":[77],"into":[78],"auto-tuning-friendly":[80],"form,":[81],"which":[82],"based":[84],"on":[85,194],"tensorized":[86,127],"primitives.":[87],"By":[88],"describing":[89],"algorithm":[91],"using":[96,225],"our":[97],"domain":[98],"specific":[99],"language":[100],"(DSL),":[101],"able":[104,188],"derive":[106],"produce":[108],"optimal":[110],"implementation":[111],"separating":[113],"hardware-dependent":[114],"hardware-agnostic":[117,138],"Hardware-dependent":[119],"encapsulated":[122],"in":[123,197],"set":[125,167],"primitives":[128],"with":[129,203],"sufficient":[130],"utilization":[131],"underlying":[134],"features.":[136],"The":[137],"contains":[140],"scheduler,":[142],"intermediate":[144],"representation":[145],"(IR)":[146],"optimizer,":[147],"auto-tuner,":[149],"code":[152,215],"generator.":[153],"These":[154],"modules":[155],"cooperate":[156],"perform":[158],"automatic":[160],"design":[161],"space":[162],"exploration,":[163],"apply":[165],"techniques,":[170],"discover":[172],"near-optimal":[174],"solution,":[175],"generate":[178],"executable":[180],"code.":[181],"Our":[182],"experiments":[183],"show":[184],"that":[185],"bring":[190],"significant":[191],"performance":[192],"improvement":[193],"over":[198],"88%":[199],"cases,":[201],"compared":[202],"best-handcrafted":[205],"Compared":[207],"black-box":[210],"autotuner,":[211],"tuning":[213],"generation":[216],"time":[217],"can":[218],"be":[219],"reduced":[220],"minutes":[222],"from":[223],"days":[224],"swATOP.":[226]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2019-07-30T00:00:00"}
