{"id":"https://openalex.org/W2884775196","doi":"https://doi.org/10.1109/cahpc.2018.8645881","title":"An Argument in Favor of Strong Scaling for Deep Neural Networks with Small Datasets","display_name":"An Argument in Favor of Strong Scaling for Deep Neural Networks with Small Datasets","publication_year":2018,"publication_date":"2018-09-01","ids":{"openalex":"https://openalex.org/W2884775196","doi":"https://doi.org/10.1109/cahpc.2018.8645881","mag":"2884775196"},"language":"en","primary_location":{"id":"doi:10.1109/cahpc.2018.8645881","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cahpc.2018.8645881","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 30th International Symposium on Computer Architecture and High Performance Computing (SBAC-PAD)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1807.09161","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Renato L. de F. Cunha","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Renato L. de F. Cunha","raw_affiliation_strings":["IBM Research"],"affiliations":[{"raw_affiliation_string":"IBM Research","institution_ids":[]}]},{"author_position":"middle","author":{"id":null,"display_name":"Eduardo R. Rodrigues","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Eduardo R. Rodrigues","raw_affiliation_strings":["IBM Research"],"affiliations":[{"raw_affiliation_string":"IBM Research","institution_ids":[]}]},{"author_position":"middle","author":{"id":null,"display_name":"Matheus Palhares Viana","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Matheus Palhares Viana","raw_affiliation_strings":["IBM Research"],"affiliations":[{"raw_affiliation_string":"IBM Research","institution_ids":[]}]},{"author_position":"last","author":{"id":null,"display_name":"Dario Augusto Borges Oliveira","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dario Augusto Borges Oliveira","raw_affiliation_strings":["IBM Research"],"affiliations":[{"raw_affiliation_string":"IBM Research","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.08140975,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"78","issue":null,"first_page":"306","last_page":"313"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7197999954223633},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.6875},{"id":"https://openalex.org/keywords/hyperparameter","display_name":"Hyperparameter","score":0.6338000297546387},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.6029999852180481},{"id":"https://openalex.org/keywords/argument","display_name":"Argument (complex analysis)","score":0.5497000217437744},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5224999785423279},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5152000188827515}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7796000242233276},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7197999954223633},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.6875},{"id":"https://openalex.org/C8642999","wikidata":"https://www.wikidata.org/wiki/Q4171168","display_name":"Hyperparameter","level":2,"score":0.6338000297546387},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6140000224113464},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.6029999852180481},{"id":"https://openalex.org/C98184364","wikidata":"https://www.wikidata.org/wiki/Q1780131","display_name":"Argument (complex analysis)","level":2,"score":0.5497000217437744},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5472000241279602},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5224999785423279},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5152000188827515},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.47029998898506165},{"id":"https://openalex.org/C182306322","wikidata":"https://www.wikidata.org/wiki/Q1779371","display_name":"Order (exchange)","level":2,"score":0.38940000534057617},{"id":"https://openalex.org/C2781002164","wikidata":"https://www.wikidata.org/wiki/Q6822311","display_name":"Meta learning (computer science)","level":3,"score":0.31470000743865967},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.2957000136375427},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2874000072479248},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2854999899864197},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.27390000224113464},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.2621999979019165}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/cahpc.2018.8645881","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cahpc.2018.8645881","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 30th International Symposium on Computer Architecture and High Performance Computing (SBAC-PAD)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1807.09161","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1807.09161","pdf_url":"https://arxiv.org/pdf/1807.09161","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1807.09161","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1807.09161","pdf_url":"https://arxiv.org/pdf/1807.09161","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W1979566015","https://openalex.org/W1988115241","https://openalex.org/W2041823554","https://openalex.org/W2051567790","https://openalex.org/W2145339207","https://openalex.org/W2533800772","https://openalex.org/W2875583934","https://openalex.org/W2919115771","https://openalex.org/W6620707391","https://openalex.org/W6628377381","https://openalex.org/W6684191040","https://openalex.org/W6713134421"],"related_works":[],"abstract_inverted_index":{"In":[0,85,104,126],"recent":[1,115],"years,":[2],"with":[3,100,147],"the":[4,38,83,111,162,166,186],"popularization":[5],"of":[6,132,175,189],"deep":[7,60],"learning":[8,61,88],"frameworks":[9],"and":[10,152],"large":[11,101,124],"datasets,":[12],"researchers":[13,66],"have":[14,91,161],"started":[15],"parallelizing":[16],"their":[17,42],"models":[18,62,121],"in":[19,34,75,130,140],"order":[20,35],"to":[21,36,58,82,94,120,149,172],"train":[22],"faster.":[23],"This":[24,44],"is":[25,46,67],"crucially":[26],"important,":[27],"because":[28],"they":[29],"typically":[30],"explore":[31],"many":[32,65],"hyperparameters":[33],"find":[37],"best":[39],"ones":[40],"for":[41,136],"applications.":[43],"process":[45],"time":[47],"consuming":[48],"and,":[49],"consequently,":[50],"speeding":[51],"up":[52,148],"training":[53],"improves":[54],"productivity.":[55],"One":[56],"approach":[57,146],"parallelize":[59],"followed":[63],"by":[64,114],"based":[68],"on":[69],"weak":[70,155],"scaling.":[71],"The":[72],"minibatches":[73],"increase":[74],"size":[76],"as":[77,165],"new":[78,87],"GPUs":[79,151],"are":[80],"added":[81],"system.":[84],"addition,":[86],"rates":[89],"schedules":[90],"been":[92],"proposed":[93],"fix":[95],"optimization":[96],"issues":[97],"that":[98,110,122,154],"occur":[99],"minibatch":[102],"sizes.":[103],"this":[105],"paper,":[106],"however,":[107],"we":[108,128],"show":[109,153],"recommendations":[112],"provided":[113],"work":[116],"do":[117],"not":[118,157,160],"apply":[119],"lack":[123],"datasets.":[125],"fact,":[127],"argument":[129],"favor":[131],"using":[133],"strong":[134,178],"scaling":[135,156,179],"achieving":[137],"reliable":[138],"performance":[139],"such":[141],"cases.":[142],"We":[143],"evaluated":[144],"our":[145],"32":[150],"only":[158],"does":[159],"same":[163,187],"accuracy":[164,188],"sequential":[167,191],"model,":[168],"it":[169],"also":[170],"fails":[171],"converge":[173],"most":[174],"time.":[176],"Meanwhile,":[177],"has":[180],"good":[181],"scalability":[182],"while":[183],"having":[184],"exactly":[185],"a":[190],"implementation.":[192]},"counts_by_year":[{"year":2022,"cited_by_count":1}],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2018-08-03T00:00:00"}
