{"id":"https://openalex.org/W2277932597","doi":"https://doi.org/10.1145/2851141.2851158","title":"Coarse grain parallelization of deep neural networks","display_name":"Coarse grain parallelization of deep neural networks","publication_year":2016,"publication_date":"2016-02-22","ids":{"openalex":"https://openalex.org/W2277932597","doi":"https://doi.org/10.1145/2851141.2851158","mag":"2277932597"},"language":"en","primary_location":{"id":"doi:10.1145/2851141.2851158","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2851141.2851158","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 21st ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036427962","display_name":"Marc Gonz\u00e1lez","orcid":"https://orcid.org/0000-0002-3780-1106"},"institutions":[{"id":"https://openalex.org/I9617848","display_name":"Universitat Polit\u00e8cnica de Catalunya","ror":"https://ror.org/03mb6wj31","country_code":"ES","type":"education","lineage":["https://openalex.org/I9617848"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Marc Gonzalez Tallada","raw_affiliation_strings":["Universitat Politecnica de Catalunya-BarcelonaTech"],"affiliations":[{"raw_affiliation_string":"Universitat Politecnica de Catalunya-BarcelonaTech","institution_ids":["https://openalex.org/I9617848"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5036427962"],"corresponding_institution_ids":["https://openalex.org/I9617848"],"apc_list":null,"apc_paid":null,"fwci":1.6946,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.8901048,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"12"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8383301496505737},{"id":"https://openalex.org/keywords/mnist-database","display_name":"MNIST database","score":0.8304198980331421},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6710037589073181},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.6395680904388428},{"id":"https://openalex.org/keywords/automatic-parallelization","display_name":"Automatic parallelization","score":0.6251869201660156},{"id":"https://openalex.org/keywords/xeon","display_name":"Xeon","score":0.5636516809463501},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4939900040626526},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.46751123666763306},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.43949806690216064},{"id":"https://openalex.org/keywords/deep-neural-networks","display_name":"Deep neural networks","score":0.4201710820198059},{"id":"https://openalex.org/keywords/rate-of-convergence","display_name":"Rate of convergence","score":0.41119030117988586},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.39731961488723755},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3331323564052582},{"id":"https://openalex.org/keywords/channel","display_name":"Channel (broadcasting)","score":0.14375066757202148}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8383301496505737},{"id":"https://openalex.org/C190502265","wikidata":"https://www.wikidata.org/wiki/Q17069496","display_name":"MNIST database","level":3,"score":0.8304198980331421},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6710037589073181},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.6395680904388428},{"id":"https://openalex.org/C164833996","wikidata":"https://www.wikidata.org/wiki/Q2323839","display_name":"Automatic parallelization","level":3,"score":0.6251869201660156},{"id":"https://openalex.org/C145108525","wikidata":"https://www.wikidata.org/wiki/Q656154","display_name":"Xeon","level":2,"score":0.5636516809463501},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4939900040626526},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.46751123666763306},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.43949806690216064},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.4201710820198059},{"id":"https://openalex.org/C57869625","wikidata":"https://www.wikidata.org/wiki/Q1783502","display_name":"Rate of convergence","level":3,"score":0.41119030117988586},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39731961488723755},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3331323564052582},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.14375066757202148},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/2851141.2851158","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2851141.2851158","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 21st ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming","raw_type":"proceedings-article"},{"id":"pmh:oai:upcommons.upc.edu:2117/104446","is_oa":false,"landing_page_url":"http://hdl.handle.net/2117/104446","pdf_url":null,"source":{"id":"https://openalex.org/S4377196262","display_name":"UPCommons institutional repository (Universitat Polit\u00e8cnica de Catalunya)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I9617848","host_organization_name":"Universitat Polit\u00e8cnica de Catalunya","host_organization_lineage":["https://openalex.org/I9617848"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/conferenceObject"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W753012316","https://openalex.org/W1442374986","https://openalex.org/W1667652561","https://openalex.org/W1849277567","https://openalex.org/W1922655562","https://openalex.org/W2007339694","https://openalex.org/W2085924714","https://openalex.org/W2112796928","https://openalex.org/W2113651538","https://openalex.org/W2117539524","https://openalex.org/W2120432001","https://openalex.org/W2130639013","https://openalex.org/W2141125852","https://openalex.org/W2146502635","https://openalex.org/W2149381887","https://openalex.org/W2155893237","https://openalex.org/W2162390675","https://openalex.org/W2163605009","https://openalex.org/W2168231600","https://openalex.org/W2184045248","https://openalex.org/W2310919327","https://openalex.org/W2604272474","https://openalex.org/W2936995161","https://openalex.org/W2952186574","https://openalex.org/W2990138404","https://openalex.org/W3118608800","https://openalex.org/W4246869989","https://openalex.org/W4299828299","https://openalex.org/W6636358008","https://openalex.org/W6677106874","https://openalex.org/W6683722107"],"related_works":["https://openalex.org/W3102660566","https://openalex.org/W2618574054","https://openalex.org/W4297942731","https://openalex.org/W4385524141","https://openalex.org/W3205838256","https://openalex.org/W3018979822","https://openalex.org/W3026616975","https://openalex.org/W4288018014","https://openalex.org/W4297776111","https://openalex.org/W2989784533"],"abstract_inverted_index":{"Deep":[0],"neural":[1,40],"networks":[2],"(DNN)":[3],"have":[4],"recently":[5],"achieved":[6,65],"extraordinary":[7],"results":[8,144],"in":[9,33,114,185],"domains":[10],"like":[11],"computer":[12,148],"vision":[13,149],"and":[14,47,52,82,129,152],"speech":[15],"recognition.":[16],"An":[17],"essential":[18],"element":[19],"for":[20,91,126,145],"this":[21],"success":[22],"has":[23,111],"been":[24,112],"the":[25,34,39,45,57,67,70,78,86,93,105,123,127,133,138,169,180],"introduction":[26],"of":[27,37,49,56,69,80,137,166,176],"high":[28],"performance":[29,135,143,174],"computing":[30],"(HPC)":[31],"techniques":[32],"critical":[35],"step":[36],"training":[38,59],"network.":[41],"This":[42,73],"paper":[43,121],"describes":[44,122],"implementation":[46],"analysis":[48],"a":[50,116,157,186],"network-agnostic":[51],"convergence-invariant":[53],"coarse-grain":[54,62],"parallelization":[55,63,110,128],"DNN":[58,94,118],"algorithm.":[60],"The":[61,96,109,120],"is":[64,75,88,98],"through":[66],"exploitation":[68],"batch-level":[71],"parallelism.":[72],"strategy":[74],"independent":[76],"from":[77],"support":[79],"specialized":[81],"optimized":[83,182],"libraries.":[84],"Therefore,":[85],"optimization":[87],"immediately":[89],"available":[90],"accelerating":[92],"training.":[95],"proposal":[97],"compatible":[99],"with":[100],"multi-GPU":[101],"execution":[102],"without":[103],"altering":[104],"algorithm":[106],"convergence":[107],"rate.":[108],"implemented":[113],"Caffe,":[115],"state-of-the-art":[117,147],"framework.":[119],"code":[124],"transformations":[125],"we":[130,163],"also":[131],"identify":[132],"limiting":[134],"factors":[136],"approach.":[139],"We":[140],"show":[141],"competitive":[142],"two":[146],"datasets,":[150],"MNIST":[151],"CIFAR-10.":[153],"In":[154],"particular,":[155],"on":[156],"16-core":[158],"Xeon":[159],"E5-2667v2":[160],"at":[161,172],"3.30GHz":[162],"observe":[164],"speedups":[165],"8\u00d7":[167],"over":[168],"sequential":[170],"execution,":[171],"similar":[173],"levels":[175],"those":[177],"obtained":[178],"by":[179],"GPU":[181],"Caffe":[183],"version":[184],"NVIDIA":[187],"K40":[188],"GPU.":[189]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2018,"cited_by_count":5},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":2}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
