{"id":"https://openalex.org/W4394998505","doi":"https://doi.org/10.1145/3620665.3640367","title":"A Journey of a 1,000 Kernels Begins with a Single Step: A Retrospective of Deep Learning on GPUs","display_name":"A Journey of a 1,000 Kernels Begins with a Single Step: A Retrospective of Deep Learning on GPUs","publication_year":2024,"publication_date":"2024-04-22","ids":{"openalex":"https://openalex.org/W4394998505","doi":"https://doi.org/10.1145/3620665.3640367"},"language":"en","primary_location":{"id":"doi:10.1145/3620665.3640367","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3620665.3640367","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3620665.3640367","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101789152","display_name":"Michael Davies","orcid":"https://orcid.org/0009-0004-1588-9604"},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Michael Davies","raw_affiliation_strings":["University of Wisconsin-Madison, Madison, Wisconsin, USA"],"raw_orcid":"https://orcid.org/0009-0004-1588-9604","affiliations":[{"raw_affiliation_string":"University of Wisconsin-Madison, Madison, Wisconsin, USA","institution_ids":["https://openalex.org/I135310074"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050963353","display_name":"Ian McDougall","orcid":"https://orcid.org/0009-0005-4339-7233"},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ian McDougall","raw_affiliation_strings":["University of Wisconsin-Madison, Madison, Wisconsin, United States of America"],"raw_orcid":"https://orcid.org/0009-0005-4339-7233","affiliations":[{"raw_affiliation_string":"University of Wisconsin-Madison, Madison, Wisconsin, United States of America","institution_ids":["https://openalex.org/I135310074"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101421859","display_name":"Selvaraj Anandaraj","orcid":null},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Selvaraj Anandaraj","raw_affiliation_strings":["University of Wisconsin-Madison, Madison, Wisconsin, USA"],"raw_orcid":"https://orcid.org/0009-0007-0815-9090","affiliations":[{"raw_affiliation_string":"University of Wisconsin-Madison, Madison, Wisconsin, USA","institution_ids":["https://openalex.org/I135310074"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5095829529","display_name":"Deep Machchhar","orcid":"https://orcid.org/0009-0004-0826-7874"},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Deep Machchhar","raw_affiliation_strings":["University of Wisconsin-Madison, Madison, Wisconsin, United States of America"],"raw_orcid":"https://orcid.org/0009-0004-0826-7874","affiliations":[{"raw_affiliation_string":"University of Wisconsin-Madison, Madison, Wisconsin, United States of America","institution_ids":["https://openalex.org/I135310074"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048715475","display_name":"Rithik Jain","orcid":"https://orcid.org/0009-0008-7953-6786"},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rithik Jain","raw_affiliation_strings":["University of Wisconsin-Madison, Madison, Wisconsin, United States of America"],"raw_orcid":"https://orcid.org/0009-0008-7953-6786","affiliations":[{"raw_affiliation_string":"University of Wisconsin-Madison, Madison, Wisconsin, United States of America","institution_ids":["https://openalex.org/I135310074"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5028943049","display_name":"Karthikeyan Sankaralingam","orcid":"https://orcid.org/0000-0002-8315-2389"},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Karthikeyan Sankaralingam","raw_affiliation_strings":["University of Wisconsin-Madison, Madison, Wisconsin, United States of America"],"raw_orcid":"https://orcid.org/0000-0002-8315-2389","affiliations":[{"raw_affiliation_string":"University of Wisconsin-Madison, Madison, Wisconsin, United States of America","institution_ids":["https://openalex.org/I135310074"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5101789152"],"corresponding_institution_ids":["https://openalex.org/I135310074"],"apc_list":null,"apc_paid":null,"fwci":1.4285,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.82023926,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"20","last_page":"36"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9943000078201294,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/suite","display_name":"Suite","score":0.8526797890663147},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7911789417266846},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6770391464233398},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.6315295100212097},{"id":"https://openalex.org/keywords/microarchitecture","display_name":"Microarchitecture","score":0.6295081973075867},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5631611943244934},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.5015254020690918},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.45736297965049744},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.41115936636924744},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3798239827156067},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3426705598831177},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.3150416612625122},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.18399277329444885}],"concepts":[{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.8526797890663147},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7911789417266846},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6770391464233398},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.6315295100212097},{"id":"https://openalex.org/C107598950","wikidata":"https://www.wikidata.org/wiki/Q259864","display_name":"Microarchitecture","level":2,"score":0.6295081973075867},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5631611943244934},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.5015254020690918},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.45736297965049744},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.41115936636924744},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3798239827156067},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3426705598831177},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.3150416612625122},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.18399277329444885},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3620665.3640367","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3620665.3640367","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3620665.3640367","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3620665.3640367","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1427776965","display_name":null,"funder_award_id":"DGE-2137424","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":57,"referenced_works":["https://openalex.org/W2110195531","https://openalex.org/W2113558024","https://openalex.org/W2163687928","https://openalex.org/W2624431344","https://openalex.org/W2761132374","https://openalex.org/W2791673912","https://openalex.org/W2800017313","https://openalex.org/W2899283552","https://openalex.org/W2900167092","https://openalex.org/W2901299405","https://openalex.org/W2907701003","https://openalex.org/W2926767350","https://openalex.org/W2935480346","https://openalex.org/W2940862705","https://openalex.org/W2963358710","https://openalex.org/W2963566954","https://openalex.org/W2963989532","https://openalex.org/W2964243274","https://openalex.org/W2969388332","https://openalex.org/W2979719709","https://openalex.org/W2989847975","https://openalex.org/W3004659153","https://openalex.org/W3017228913","https://openalex.org/W3021029305","https://openalex.org/W3038827379","https://openalex.org/W3040646053","https://openalex.org/W3043303806","https://openalex.org/W3043571714","https://openalex.org/W3081168214","https://openalex.org/W3096720818","https://openalex.org/W3101708369","https://openalex.org/W3118210634","https://openalex.org/W3125884253","https://openalex.org/W3129093240","https://openalex.org/W3130554079","https://openalex.org/W3138516171","https://openalex.org/W3154028478","https://openalex.org/W3165926952","https://openalex.org/W3173154111","https://openalex.org/W3173358825","https://openalex.org/W3174086521","https://openalex.org/W3177277791","https://openalex.org/W3190062760","https://openalex.org/W3192336523","https://openalex.org/W3203806429","https://openalex.org/W4200150166","https://openalex.org/W4287363917","https://openalex.org/W4287748555","https://openalex.org/W4293254947","https://openalex.org/W4309672181","https://openalex.org/W4312968147","https://openalex.org/W6745245109","https://openalex.org/W6757794950","https://openalex.org/W6774508117","https://openalex.org/W6778883912","https://openalex.org/W6782855247","https://openalex.org/W6783741718"],"related_works":["https://openalex.org/W4379115841","https://openalex.org/W2083794993","https://openalex.org/W1511772879","https://openalex.org/W4394660363","https://openalex.org/W2186315912","https://openalex.org/W2248125223","https://openalex.org/W2588591308","https://openalex.org/W2910542634","https://openalex.org/W2127898439","https://openalex.org/W3082894236"],"abstract_inverted_index":{"We":[0,105],"are":[1],"in":[2,15],"age":[3],"of":[4,49,53,62,73,94,109],"AI,":[5],"with":[6,143],"rapidly":[7],"changing":[8],"algorithms":[9],"and":[10,30,44,57,84,87,111,130,132],"a":[11,19,26,46,60,70],"somewhat":[12],"synergistic":[13],"change":[14],"hardware.":[16,32],"MLPerf":[17],"is":[18,40],"recent":[20],"benchmark":[21],"suite":[22],"that":[23],"serves":[24],"as":[25],"way":[27],"to":[28,96,145],"compare":[29],"evaluate":[31],"However":[33],"it":[34,39],"has":[35,102],"several":[36],"drawbacks":[37],"-":[38],"dominated":[41],"by":[42],"CNNs":[43],"does":[45],"poor":[47],"job":[48],"capturing":[50],"the":[51,99,114,124,140],"diversity":[52],"AI":[54,64,75,100],"use":[55,65],"cases,":[56],"only":[58],"represents":[59],"sliver":[61],"production":[63],"cases.":[66],"This":[67],"paper":[68,118,136],"performs":[69],"longitudinal":[71],"study":[72],"state-of-art":[74],"applications":[76,110],"spanning":[77],"vision,":[78],"physical":[79],"simulation,":[80],"vision":[81],"synthesis,":[82],"language":[83],"speech":[85],"processing,":[86,90],"tabular":[88],"data":[89,121],"across":[91],"three":[92],"generations":[93],"hardware":[95,131],"understand":[97],"how":[98],"revolution":[101,142],"panned":[103],"out.":[104],"call":[106],"this":[107],"collection":[108],"execution":[112],"scaffolding":[113],"CaSiO":[115],"suite.":[116],"The":[117,135],"reports":[119],"on":[120,139],"gathered":[122],"at":[123],"framework":[125],"level,":[126,129],"device":[127],"API":[128],"microarchitecture":[133],"level.":[134],"provides":[137],"insights":[138],"hardware-software":[141],"pointers":[144],"future":[146],"trends.":[147]},"counts_by_year":[{"year":2025,"cited_by_count":6}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
