{"id":"https://openalex.org/W2966258469","doi":"https://doi.org/10.1177/1094342019866247","title":"SWIRL: High-performance many-core CPU code generation for deep neural networks","display_name":"SWIRL: High-performance many-core CPU code generation for deep neural networks","publication_year":2019,"publication_date":"2019-08-04","ids":{"openalex":"https://openalex.org/W2966258469","doi":"https://doi.org/10.1177/1094342019866247","mag":"2966258469"},"language":"en","primary_location":{"id":"doi:10.1177/1094342019866247","is_oa":false,"landing_page_url":"https://doi.org/10.1177/1094342019866247","pdf_url":null,"source":{"id":"https://openalex.org/S60606485","display_name":"The International Journal of High Performance Computing Applications","issn_l":"1094-3420","issn":["1094-3420","1741-2846"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The International Journal of High Performance Computing Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5078599914","display_name":"Anand Venkat","orcid":"https://orcid.org/0000-0002-4167-4525"},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Anand Venkat","raw_affiliation_strings":["Parallel Computing Laboratory, Intel Labs, Santa Clara, CA, USA"],"affiliations":[{"raw_affiliation_string":"Parallel Computing Laboratory, Intel Labs, Santa Clara, CA, USA","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108620835","display_name":"Tharindu Rusira","orcid":null},"institutions":[{"id":"https://openalex.org/I223532165","display_name":"University of Utah","ror":"https://ror.org/03r0ha626","country_code":"US","type":"education","lineage":["https://openalex.org/I223532165"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tharindu Rusira","raw_affiliation_strings":["School of Computing, University of Utah, Salt Lake City, UT, USA"],"affiliations":[{"raw_affiliation_string":"School of Computing, University of Utah, Salt Lake City, UT, USA","institution_ids":["https://openalex.org/I223532165"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102819808","display_name":"Rajkishore Barik","orcid":"https://orcid.org/0000-0003-4779-1391"},"institutions":[{"id":"https://openalex.org/I2946016260","display_name":"Uber AI (United States)","ror":"https://ror.org/05vm0ed18","country_code":"US","type":"company","lineage":["https://openalex.org/I2946016260"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Raj Barik","raw_affiliation_strings":["Uber Technologies Inc, CA, USA"],"affiliations":[{"raw_affiliation_string":"Uber Technologies Inc, CA, USA","institution_ids":["https://openalex.org/I2946016260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030152493","display_name":"Mary Hall","orcid":"https://orcid.org/0000-0002-3058-7573"},"institutions":[{"id":"https://openalex.org/I223532165","display_name":"University of Utah","ror":"https://ror.org/03r0ha626","country_code":"US","type":"education","lineage":["https://openalex.org/I223532165"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mary Hall","raw_affiliation_strings":["School of Computing, University of Utah, Salt Lake City, UT, USA"],"affiliations":[{"raw_affiliation_string":"School of Computing, University of Utah, Salt Lake City, UT, USA","institution_ids":["https://openalex.org/I223532165"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5045835391","display_name":"Leonard Truong","orcid":"https://orcid.org/0000-0001-7583-9730"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Leonard Truong","raw_affiliation_strings":["Computer Science Department, Stanford University, CA, USA"],"affiliations":[{"raw_affiliation_string":"Computer Science Department, Stanford University, CA, USA","institution_ids":["https://openalex.org/I97018004"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5078599914"],"corresponding_institution_ids":["https://openalex.org/I1343180700"],"apc_list":null,"apc_paid":null,"fwci":1.7208,"has_fulltext":false,"cited_by_count":25,"citation_normalized_percentile":{"value":0.87957638,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"33","issue":"6","first_page":"1275","last_page":"1289"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8802586793899536},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.5334959626197815},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5117626190185547},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5021202564239502},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.47906360030174255},{"id":"https://openalex.org/keywords/graphics-processing-unit","display_name":"Graphics processing unit","score":0.4657493531703949},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.436065673828125},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4323021173477173},{"id":"https://openalex.org/keywords/central-processing-unit","display_name":"Central processing unit","score":0.4215809106826782},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.4203081727027893},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.2745712995529175},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.16301792860031128}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8802586793899536},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.5334959626197815},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5117626190185547},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5021202564239502},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.47906360030174255},{"id":"https://openalex.org/C2779851693","wikidata":"https://www.wikidata.org/wiki/Q183484","display_name":"Graphics processing unit","level":2,"score":0.4657493531703949},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.436065673828125},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4323021173477173},{"id":"https://openalex.org/C49154492","wikidata":"https://www.wikidata.org/wiki/Q5300","display_name":"Central processing unit","level":2,"score":0.4215809106826782},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.4203081727027893},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.2745712995529175},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.16301792860031128}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1177/1094342019866247","is_oa":false,"landing_page_url":"https://doi.org/10.1177/1094342019866247","pdf_url":null,"source":{"id":"https://openalex.org/S60606485","display_name":"The International Journal of High Performance Computing Applications","issn_l":"1094-3420","issn":["1094-3420","1741-2846"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The International Journal of High Performance Computing Applications","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320310050","display_name":"University of Utah","ror":"https://ror.org/03r0ha626"},{"id":"https://openalex.org/F4320320698","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W1884620995","https://openalex.org/W2085105049","https://openalex.org/W2117539524","https://openalex.org/W2121546953","https://openalex.org/W2155893237","https://openalex.org/W2257979135","https://openalex.org/W2412412865","https://openalex.org/W2518955564","https://openalex.org/W2604272474","https://openalex.org/W2750586355","https://openalex.org/W2805622899","https://openalex.org/W2921932275","https://openalex.org/W2962780982","https://openalex.org/W4244568863","https://openalex.org/W4251637954","https://openalex.org/W6601630192"],"related_works":["https://openalex.org/W1497796367","https://openalex.org/W1483098478","https://openalex.org/W2129123431","https://openalex.org/W2161462353","https://openalex.org/W2003609199","https://openalex.org/W2119534391","https://openalex.org/W2794923745","https://openalex.org/W2075046026","https://openalex.org/W2350330939","https://openalex.org/W1464113540"],"abstract_inverted_index":{"Deep":[0],"neural":[1],"networks":[2],"(DNNs)":[3],"have":[4,80],"demonstrated":[5],"effectiveness":[6],"in":[7,25,146],"many":[8],"domains":[9],"including":[10],"object":[11],"recognition,":[12,14],"speech":[13],"natural":[15],"language":[16,123],"processing,":[17],"and":[18,28,33,48,133,142,160,187,203,207],"health":[19],"care.":[20],"Typically,":[21],"the":[22,56,81,92,120,195],"computations":[23],"involved":[24],"DNN":[26,61,131,147],"training":[27],"inferencing":[29],"are":[30],"time":[31],"consuming":[32],"require":[34],"efficient":[35],"implementations.":[36],"Existing":[37],"frameworks":[38,71],"such":[39],"as":[40,55],"TensorFlow,":[41],"Theano,":[42],"Torch,":[43],"Cognitive":[44],"Tool":[45],"Kit":[46],"(CNTK),":[47],"Caffe":[49],"enable":[50],"Graphics":[51],"Processing":[52,65],"Unit":[53,66],"(GPUs)":[54],"status":[57],"quo":[58],"devices":[59],"for":[60,112,125,140,163],"execution,":[62],"leaving":[63],"Central":[64],"(CPUs)":[67],"behind.":[68],"Moreover,":[69],"existing":[70,121],"forgo":[72],"or":[73],"limit":[74],"cross":[75],"layer":[76],"optimization":[77],"opportunities":[78],"that":[79,107],"potential":[82],"to":[83,155],"improve":[84],"performance":[85,174],"by":[86,201],"significantly":[87],"reducing":[88],"data":[89],"movement":[90],"through":[91],"memory":[93],"hierarchy.":[94],"In":[95],"this":[96],"article,":[97],"we":[98],"describe":[99],"an":[100,166],"alternative":[101],"approach":[102],"called":[103,127],"SWIRL,":[104],"a":[105],"compiler":[106,198],"provides":[108],"high-performance":[109],"CPU":[110],"implementations":[111],"DNNs.":[113],"SWIRL":[114,129,172],"is":[115],"built":[116],"on":[117,181,199,205],"top":[118],"of":[119,184,189],"domain-specific":[122],"(DSL)":[124],"DNNs":[126],"LATTE.":[128],"separates":[130],"specification":[132],"its":[134],"schedule":[135],"using":[136],"predefined":[137],"transformation":[138],"recipes":[139,150],"tensors":[141],"layers":[143],"commonly":[144],"found":[145],"layers.":[148],"These":[149],"synergize":[151],"with":[152,176,179],"DSL":[153],"constructs":[154],"generate":[156],"high-quality":[157],"fused,":[158],"vectorized,":[159],"parallelized":[161],"code":[162],"CPUs.":[164],"On":[165],"Intel":[167],"Xeon":[168],"Platinum":[169],"8180M":[170],"CPU,":[171],"achieves":[173],"comparable":[175],"Tensorflow":[177,185,190],"integrated":[178],"MKL-DNN;":[180],"average":[182,200],"1.00\u00d7":[183],"inference":[186,206],"0.99\u00d7":[188],"training.":[191],"It":[192],"also":[193],"outperforms":[194],"original":[196],"LATTE":[197],"1.22\u00d7":[202],"1.30\u00d7":[204],"training,":[208],"respectively.":[209]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":7},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":5}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
