{"id":"https://openalex.org/W2580688187","doi":"https://doi.org/10.1145/3018743.3018769","title":"S-Caffe","display_name":"S-Caffe","publication_year":2017,"publication_date":"2017-01-26","ids":{"openalex":"https://openalex.org/W2580688187","doi":"https://doi.org/10.1145/3018743.3018769","mag":"2580688187"},"language":"en","primary_location":{"id":"doi:10.1145/3018743.3018769","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3018743.3018769","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 22nd ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5004330728","display_name":"Ammar Ahmad Awan","orcid":"https://orcid.org/0000-0002-6272-3760"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Ammar Ahmad Awan","raw_affiliation_strings":["The Ohio State University, Columbus, OH, USA"],"affiliations":[{"raw_affiliation_string":"The Ohio State University, Columbus, OH, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048039700","display_name":"Khaled Hamidouche","orcid":"https://orcid.org/0000-0003-4836-5335"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Khaled Hamidouche","raw_affiliation_strings":["The Ohio State University, Columbus, OH, USA"],"affiliations":[{"raw_affiliation_string":"The Ohio State University, Columbus, OH, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008298485","display_name":"Jahanzeb Maqbool Hashmi","orcid":null},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jahanzeb Maqbool Hashmi","raw_affiliation_strings":["The Ohio State University, Columbus, OH, USA"],"affiliations":[{"raw_affiliation_string":"The Ohio State University, Columbus, OH, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5024879682","display_name":"Dhabaleswar K. Panda","orcid":"https://orcid.org/0000-0002-0356-1781"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dhabaleswar K. Panda","raw_affiliation_strings":["The Ohio State University, Columbus, OH, USA"],"affiliations":[{"raw_affiliation_string":"The Ohio State University, Columbus, OH, USA","institution_ids":["https://openalex.org/I52357470"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5004330728"],"corresponding_institution_ids":["https://openalex.org/I52357470"],"apc_list":null,"apc_paid":null,"fwci":8.7112,"has_fulltext":false,"cited_by_count":135,"citation_normalized_percentile":{"value":0.98557254,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"193","last_page":"205"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12702","display_name":"Brain Tumor Detection and Classification","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/2808","display_name":"Neurology"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.8716202974319458},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.802756667137146},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.772154688835144},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.7481480240821838},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5600823163986206},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5330079197883606},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.507117509841919},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4756048619747162},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.4276273250579834},{"id":"https://openalex.org/keywords/gpu-cluster","display_name":"GPU cluster","score":0.4240390658378601},{"id":"https://openalex.org/keywords/massively-parallel","display_name":"Massively parallel","score":0.42309391498565674},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.1472395956516266}],"concepts":[{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.8716202974319458},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.802756667137146},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.772154688835144},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.7481480240821838},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5600823163986206},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5330079197883606},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.507117509841919},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4756048619747162},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.4276273250579834},{"id":"https://openalex.org/C2781335571","wikidata":"https://www.wikidata.org/wiki/Q2633544","display_name":"GPU cluster","level":3,"score":0.4240390658378601},{"id":"https://openalex.org/C190475519","wikidata":"https://www.wikidata.org/wiki/Q544384","display_name":"Massively parallel","level":2,"score":0.42309391498565674},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.1472395956516266},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3018743.3018769","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3018743.3018769","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 22nd ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","score":0.4099999964237213,"id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W1442374986","https://openalex.org/W1548328233","https://openalex.org/W1583837637","https://openalex.org/W1598866093","https://openalex.org/W1606347560","https://openalex.org/W1686810756","https://openalex.org/W2062764889","https://openalex.org/W2078391824","https://openalex.org/W2097117768","https://openalex.org/W2108598243","https://openalex.org/W2155893237","https://openalex.org/W2162390675","https://openalex.org/W2163605009","https://openalex.org/W2165808215","https://openalex.org/W2168231600","https://openalex.org/W2170135819","https://openalex.org/W2172734211","https://openalex.org/W2178615544","https://openalex.org/W2215673219","https://openalex.org/W2253535924","https://openalex.org/W2271840356","https://openalex.org/W2339765813","https://openalex.org/W2474421929","https://openalex.org/W2604272474","https://openalex.org/W3118608800"],"related_works":["https://openalex.org/W2983282793","https://openalex.org/W1973046741","https://openalex.org/W3138386522","https://openalex.org/W2499279132","https://openalex.org/W2056543843","https://openalex.org/W2056717482","https://openalex.org/W2030707850","https://openalex.org/W2170611190","https://openalex.org/W2566934642","https://openalex.org/W2548246577"],"abstract_inverted_index":{"Availability":[0],"of":[1,34,86,97,105,126,188,196,220,254],"large":[2],"data":[3,132],"sets":[4],"like":[5,16,37],"ImageNet":[6],"and":[7,41,61,74,95,109,128,134,156,166,222,230],"massively":[8],"parallel":[9],"computation":[10,127],"support":[11],"in":[12,24],"modern":[13,79],"HPC":[14,63],"devices":[15],"NVIDIA":[17],"GPUs":[18,181],"have":[19,47],"fueled":[20],"a":[21,51,72,103,147,160,186],"renewed":[22],"interest":[23],"Deep":[25],"Learning":[26],"(DL)":[27],"algorithms.":[28],"This":[29],"has":[30],"triggered":[31],"the":[32,66,92,106,110,115,124,142,194,201,245,252],"development":[33],"DL":[35,45,59,67,93],"frameworks":[36,46,60,94],"Caffe,":[38],"Torch,":[39],"TensorFlow,":[40],"CNTK.":[42,256],"However,":[43],"most":[44],"been":[48],"limited":[49],"to":[50,56,65,122,141,159,178,207,239,251],"single":[52,213],"node.":[53],"In":[54,234],"order":[55],"scale":[57],"out":[58],"bring":[62,139],"capabilities":[64],"arena,":[68],"we":[69,101,118],"propose,":[70],"S-Caffe;":[71],"scalable":[73],"distributed":[75],"Caffe":[76,107,227],"adaptation":[77],"for":[78,171,182,212,228,244],"multi-GPU":[80],"clusters.":[81],"With":[82],"an":[83,218],"in-depth":[84],"analysis":[85],"new":[87],"requirements":[88],"brought":[89],"forward":[90],"by":[91,145],"limitations":[96],"current":[98],"communication":[99,129],"runtimes,":[100],"present":[102],"co-design":[104,116],"framework":[108,203],"MVAPICH2-GDR":[111],"MPI":[112,143],"runtime.":[113],"Using":[114],"methodology,":[117],"modify":[119],"Caffe's":[120],"workflow":[121],"maximize":[123],"overlap":[125],"with":[130,185],"multi-stage":[131],"propagation":[133],"gradient":[135],"aggregation":[136],"schemes.":[137],"We":[138],"DL-Awareness":[140],"runtime":[144],"proposing":[146],"hierarchical":[148],"reduction":[149],"design":[150],"that":[151,204],"benefits":[152],"from":[153],"CUDA-Aware":[154],"features":[155],"provides":[157],"up":[158,177,206,238],"massive":[161],"133x":[162],"speedup":[163,168,187],"over":[164,169,190,224],"OpenMPI":[165],"2.6x":[167],"MVAPICH2":[170],"160":[172,179,208],"GPUs.":[173,192,209],"S-Caffe":[174,216,236],"successfully":[175],"scales":[176,205],"K-80":[180],"GoogLeNet":[183],"(ImageNet)":[184],"2.5x":[189],"32":[191],"To":[193],"best":[195],"our":[197],"knowledge,":[198],"this":[199],"is":[200,249],"first":[202],"Furthermore,":[210],"even":[211],"node":[214],"training,":[215],"shows":[217],"improvement":[219],"14\\%":[221],"9\\%":[223],"Nvidia's":[225],"optimized":[226],"8":[229],"16":[231],"GPUs,":[232],"respectively.":[233],"addition,":[235],"achieves":[237],"1395":[240],"samples":[241],"per":[242],"second":[243],"AlexNet":[246],"model,":[247],"which":[248],"comparable":[250],"performance":[253],"Microsoft":[255]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":7},{"year":2021,"cited_by_count":19},{"year":2020,"cited_by_count":21},{"year":2019,"cited_by_count":30},{"year":2018,"cited_by_count":31},{"year":2017,"cited_by_count":12}],"updated_date":"2026-04-02T15:55:50.835912","created_date":"2017-02-03T00:00:00"}
