{"id":"https://openalex.org/W2076246374","doi":"https://doi.org/10.1145/2716282.2716289","title":"Stochastic gradient descent on GPUs","display_name":"Stochastic gradient descent on GPUs","publication_year":2015,"publication_date":"2015-02-03","ids":{"openalex":"https://openalex.org/W2076246374","doi":"https://doi.org/10.1145/2716282.2716289","mag":"2076246374"},"language":"en","primary_location":{"id":"doi:10.1145/2716282.2716289","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2716282.2716289","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 8th Workshop on General Purpose Processing using GPUs","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5011021816","display_name":"Rashid Kaleem","orcid":null},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Rashid Kaleem","raw_affiliation_strings":["University of Texas at Austin, USA"],"affiliations":[{"raw_affiliation_string":"University of Texas at Austin, USA","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006053754","display_name":"Sreepathi Pai","orcid":"https://orcid.org/0000-0002-3691-7238"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sreepathi Pai","raw_affiliation_strings":["University of Texas at Austin, USA"],"affiliations":[{"raw_affiliation_string":"University of Texas at Austin, USA","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5013181067","display_name":"Keshav Pingali","orcid":"https://orcid.org/0000-0002-0484-4636"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Keshav Pingali","raw_affiliation_strings":["University of Texas at Austin, USA"],"affiliations":[{"raw_affiliation_string":"University of Texas at Austin, USA","institution_ids":["https://openalex.org/I86519309"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5011021816"],"corresponding_institution_ids":["https://openalex.org/I86519309"],"apc_list":null,"apc_paid":null,"fwci":10.6823,"has_fulltext":false,"cited_by_count":37,"citation_normalized_percentile":{"value":0.98232054,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"81","last_page":"89"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8866315484046936},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7042500972747803},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.6866182088851929},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.616611897945404},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.5919262170791626},{"id":"https://openalex.org/keywords/stochastic-gradient-descent","display_name":"Stochastic gradient descent","score":0.5488855242729187},{"id":"https://openalex.org/keywords/schedule","display_name":"Schedule","score":0.4337725043296814},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.39633283019065857},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.13495948910713196},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.12261644005775452},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.10794377326965332}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8866315484046936},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7042500972747803},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.6866182088851929},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.616611897945404},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.5919262170791626},{"id":"https://openalex.org/C206688291","wikidata":"https://www.wikidata.org/wiki/Q7617819","display_name":"Stochastic gradient descent","level":3,"score":0.5488855242729187},{"id":"https://openalex.org/C68387754","wikidata":"https://www.wikidata.org/wiki/Q7271585","display_name":"Schedule","level":2,"score":0.4337725043296814},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.39633283019065857},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.13495948910713196},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.12261644005775452},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.10794377326965332},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/2716282.2716289","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2716282.2716289","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 8th Workshop on General Purpose Processing using GPUs","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.696.3557","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.696.3557","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://iss.ices.utexas.edu/Publications/Papers/kaleem-2015-SGD-GPU.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320309480","display_name":"Nvidia","ror":"https://ror.org/03jdj4y14"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W146900863","https://openalex.org/W1768849904","https://openalex.org/W2011039300","https://openalex.org/W2018934112","https://openalex.org/W2062140606","https://openalex.org/W2071039340","https://openalex.org/W2094233035","https://openalex.org/W2098903349","https://openalex.org/W2120432001","https://openalex.org/W2159350554","https://openalex.org/W2168231600","https://openalex.org/W2963113252","https://openalex.org/W4232836277"],"related_works":["https://openalex.org/W17155033","https://openalex.org/W3207760230","https://openalex.org/W1496222301","https://openalex.org/W4312814274","https://openalex.org/W1590307681","https://openalex.org/W2536018345","https://openalex.org/W4285370786","https://openalex.org/W2296488620","https://openalex.org/W2358353312","https://openalex.org/W2149245348"],"abstract_inverted_index":{"Irregular":[0],"algorithms":[1],"such":[2],"as":[3],"Stochastic":[4],"Gradient":[5],"Descent":[6],"(SGD)":[7],"can":[8,96,127],"benefit":[9],"from":[10,45],"the":[11,63,75,78,83,89,112,121,137,142],"massive":[12],"parallelism":[13],"available":[14],"on":[15],"GPUs.":[16],"However,":[17],"unlike":[18],"in":[19,24],"data-parallel":[20],"algorithms,":[21],"synchronization":[22,40],"patterns":[23],"SGD":[25],"are":[26],"quite":[27],"complex.":[28],"Furthermore,":[29],"scheduling":[30,102],"for":[31,42,108,124],"scale-free":[32],"graphs":[33],"is":[34],"challenging.":[35],"This":[36,94],"work":[37],"examines":[38],"several":[39],"strategies":[41,103],"SGD,":[43],"ranging":[44],"simple":[46],"locking":[47],"to":[48,65,81,99,114,130,161,172],"conflict-free":[49],"scheduling.":[50],"We":[51,73,118],"observe":[52],"that":[53,120],"static":[54,168],"schedules":[55,107,158],"do":[56],"not":[57],"yield":[58],"better":[59,116],"performance":[60,79,143],"despite":[61],"eliminating":[62],"need":[64],"perform":[66,159],"conflict":[67],"detection":[68],"and":[69],"resolution":[70],"at":[71],"runtime.":[72],"identify":[74],"source":[76],"of":[77,85,88,111,133,144,155],"degradation":[80],"be":[82,97,128],"structure":[84],"certain":[86],"parts":[87],"graph":[90,113],"(dense":[91],"vs":[92],"sparse).":[93],"classification":[95],"used":[98],"devise":[100],"hybrid":[101],"which":[104],"exploit":[105],"different":[106,109],"regions":[110],"obtain":[115],"performance.":[117],"found":[119],"best":[122],"schedule":[123,169],"some":[125],"problems":[126],"up":[129],"two":[131],"orders":[132],"magnitude":[134],"faster":[135],"than":[136],"worst":[138],"one.":[139],"To":[140],"evaluate":[141],"our":[145],"GPU":[146],"implementation,":[147,165],"we":[148],"also":[149],"compare":[150],"against":[151],"a":[152,162,167,173],"CPU":[153,164,175],"implementation":[154],"SGD.":[156],"Dynamic":[157],"comparably":[160,171],"14-thread":[163],"while":[166],"performs":[170],"6-thread":[174],"implementation.":[176]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":7},{"year":2017,"cited_by_count":11},{"year":2016,"cited_by_count":4},{"year":2015,"cited_by_count":2}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
