{"id":"https://openalex.org/W2916187335","doi":"https://doi.org/10.1109/cahpc.2018.8645860","title":"Deep Learning on Large-Scale Muticore Clusters","display_name":"Deep Learning on Large-Scale Muticore Clusters","publication_year":2018,"publication_date":"2018-09-01","ids":{"openalex":"https://openalex.org/W2916187335","doi":"https://doi.org/10.1109/cahpc.2018.8645860","mag":"2916187335"},"language":"en","primary_location":{"id":"doi:10.1109/cahpc.2018.8645860","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cahpc.2018.8645860","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 30th International Symposium on Computer Architecture and High Performance Computing (SBAC-PAD)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5044305771","display_name":"Kazumasa Sakivama","orcid":null},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Kazumasa Sakivama","raw_affiliation_strings":["The University of Tokyo"],"affiliations":[{"raw_affiliation_string":"The University of Tokyo","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101496340","display_name":"Shinpei Kato","orcid":"https://orcid.org/0000-0003-1782-5319"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Shinpei Kato","raw_affiliation_strings":["The University of Tokyo"],"affiliations":[{"raw_affiliation_string":"The University of Tokyo","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081333482","display_name":"Y. Ishikawa","orcid":null},"institutions":[{"id":"https://openalex.org/I4210110652","display_name":"RIKEN","ror":"https://ror.org/01sjwvz98","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210110652"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yutaka Ishikawa","raw_affiliation_strings":["RIKEN"],"affiliations":[{"raw_affiliation_string":"RIKEN","institution_ids":["https://openalex.org/I4210110652"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102836031","display_name":"Atsushi Hori","orcid":"https://orcid.org/0000-0002-7010-8098"},"institutions":[{"id":"https://openalex.org/I4210110652","display_name":"RIKEN","ror":"https://ror.org/01sjwvz98","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210110652"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Atsushi Hori","raw_affiliation_strings":["RIKEN"],"affiliations":[{"raw_affiliation_string":"RIKEN","institution_ids":["https://openalex.org/I4210110652"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5036646687","display_name":"Abraham Monrroy","orcid":"https://orcid.org/0000-0002-3282-9126"},"institutions":[{"id":"https://openalex.org/I60134161","display_name":"Nagoya University","ror":"https://ror.org/04chrp450","country_code":"JP","type":"education","lineage":["https://openalex.org/I60134161"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Abraham Monrroy","raw_affiliation_strings":["Nagoya University"],"affiliations":[{"raw_affiliation_string":"Nagoya University","institution_ids":["https://openalex.org/I60134161"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5044305771"],"corresponding_institution_ids":["https://openalex.org/I74801974"],"apc_list":null,"apc_paid":null,"fwci":0.1045,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.49121525,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"314","last_page":"321"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12676","display_name":"Machine Learning and ELM","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.9434397220611572},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8630968928337097},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7046039700508118},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.70096755027771},{"id":"https://openalex.org/keywords/node","display_name":"Node (physics)","score":0.6611328125},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.6315805315971375},{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.5846908688545227},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5841662287712097},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.45779284834861755},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.44383135437965393},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.41389769315719604},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.33101120591163635}],"concepts":[{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.9434397220611572},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8630968928337097},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7046039700508118},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.70096755027771},{"id":"https://openalex.org/C62611344","wikidata":"https://www.wikidata.org/wiki/Q1062658","display_name":"Node (physics)","level":2,"score":0.6611328125},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.6315805315971375},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.5846908688545227},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5841662287712097},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45779284834861755},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.44383135437965393},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.41389769315719604},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.33101120591163635},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cahpc.2018.8645860","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cahpc.2018.8645860","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 30th International Symposium on Computer Architecture and High Performance Computing (SBAC-PAD)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W1510543252","https://openalex.org/W1530262073","https://openalex.org/W1598866093","https://openalex.org/W1970456555","https://openalex.org/W2097117768","https://openalex.org/W2113929681","https://openalex.org/W2116360511","https://openalex.org/W2154987621","https://openalex.org/W2155893237","https://openalex.org/W2156163116","https://openalex.org/W2163605009","https://openalex.org/W2188183693","https://openalex.org/W2194775991","https://openalex.org/W2580688187","https://openalex.org/W2622263826","https://openalex.org/W2740001873","https://openalex.org/W2749988060","https://openalex.org/W2962911728","https://openalex.org/W4246367117","https://openalex.org/W4293318097","https://openalex.org/W6631660994","https://openalex.org/W6635810480","https://openalex.org/W6684191040","https://openalex.org/W6739622702","https://openalex.org/W6743289643","https://openalex.org/W6765060121"],"related_works":["https://openalex.org/W2058965144","https://openalex.org/W2164382479","https://openalex.org/W2146343568","https://openalex.org/W98480971","https://openalex.org/W2150291671","https://openalex.org/W3157543420","https://openalex.org/W3012895752","https://openalex.org/W2964954556","https://openalex.org/W3023876411","https://openalex.org/W123152114"],"abstract_inverted_index":{"Convolutional":[0],"neural":[1],"networks":[2],"(CNNs)":[3],"have":[4,15,49],"achieved":[5,117],"outstanding":[6],"accuracy":[7],"among":[8],"conventional":[9,132],"machine":[10],"learning":[11],"algorithms.":[12],"Recent":[13],"works":[14],"shown":[16],"that":[17,105],"large":[18],"and":[19,59,81,101,134],"complicated":[20],"models,":[21],"which":[22],"take":[23],"significant":[24],"cost":[25],"for":[26,47,61,87],"training":[27,71],"are":[28,55,63],"needed":[29],"to":[30,131,143],"get":[31],"higher":[32],"accuracy.":[33],"To":[34],"train":[35],"these":[36],"models":[37],"efficiently":[38],"in":[39,123],"high":[40],"performance":[41,72],"computers":[42],"(HPCs),":[43],"many":[44],"parallelization":[45,86],"techniques":[46,54,83],"CNNs":[48,107],"been":[50],"developed.":[51],"However,":[52],"most":[53,119],"mainly":[56],"targeting":[57],"GPUs":[58],"parallelizations":[60],"CPUs":[62],"not":[64],"fully":[65],"investigated.":[66],"This":[67],"paper":[68],"explores":[69],"CNN":[70],"on":[73,93],"large-scale":[74],"multicore":[75],"clusters":[76],"by":[77,111],"optimizing":[78],"intra-node":[79],"processing":[80],"applying":[82],"of":[84],"inter-node":[85],"multiple":[88],"GPUs.":[89],"Detailed":[90],"experiments":[91],"conducted":[92],"state-of-the-art":[94],"multi-core":[95],"processors":[96],"using":[97,112],"the":[98],"openMP":[99],"API":[100],"MPI":[102],"framework":[103],"demonstrated":[104],"Caffe-based":[106],"can":[108],"be":[109],"accelerated":[110],"well-designed":[113],"multithreaded":[114],"programs.":[115],"We":[116],"at":[118],"1.64":[120],"times":[121,137],"speedup":[122,138],"convolution":[124],"operations":[125],"with":[126,139],"devised":[127],"lowering":[128,133],"strategy":[129],"compared":[130,142],"acquired":[135],"772":[136],"864":[140],"nodes":[141],"one":[144],"node.":[145]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2019,"cited_by_count":1}],"updated_date":"2026-01-13T01:12:25.745995","created_date":"2025-10-10T00:00:00"}
