{"id":"https://openalex.org/W3118228302","doi":"https://doi.org/10.1109/hpec43674.2020.9286138","title":"Inference Benchmarking on HPC Systems","display_name":"Inference Benchmarking on HPC Systems","publication_year":2020,"publication_date":"2020-09-22","ids":{"openalex":"https://openalex.org/W3118228302","doi":"https://doi.org/10.1109/hpec43674.2020.9286138","mag":"3118228302"},"language":"en","primary_location":{"id":"doi:10.1109/hpec43674.2020.9286138","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec43674.2020.9286138","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018102608","display_name":"Wesley Brewer","orcid":"https://orcid.org/0000-0002-3639-3956"},"institutions":[{"id":"https://openalex.org/I4210101542","display_name":"Benchmark Research (United States)","ror":"https://ror.org/0185yec04","country_code":"US","type":"company","lineage":["https://openalex.org/I4210101542"]},{"id":"https://openalex.org/I87303767","display_name":"U.S. Army Engineer Research and Development Center","ror":"https://ror.org/027mhn368","country_code":"US","type":"facility","lineage":["https://openalex.org/I1304082316","https://openalex.org/I1306490931","https://openalex.org/I1330347796","https://openalex.org/I87303767"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Wesley Brewer","raw_affiliation_strings":["DoD HPCMP PET / Benchmarking Engineer Research & Development Center, Vicksburg, MS"],"affiliations":[{"raw_affiliation_string":"DoD HPCMP PET / Benchmarking Engineer Research & Development Center, Vicksburg, MS","institution_ids":["https://openalex.org/I4210101542","https://openalex.org/I87303767"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026067378","display_name":"Greg Behm","orcid":null},"institutions":[{"id":"https://openalex.org/I4210101542","display_name":"Benchmark Research (United States)","ror":"https://ror.org/0185yec04","country_code":"US","type":"company","lineage":["https://openalex.org/I4210101542"]},{"id":"https://openalex.org/I87303767","display_name":"U.S. Army Engineer Research and Development Center","ror":"https://ror.org/027mhn368","country_code":"US","type":"facility","lineage":["https://openalex.org/I1304082316","https://openalex.org/I1306490931","https://openalex.org/I1330347796","https://openalex.org/I87303767"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Greg Behm","raw_affiliation_strings":["DoD HPCMP PET / Benchmarking Engineer Research & Development Center, Vicksburg, MS"],"affiliations":[{"raw_affiliation_string":"DoD HPCMP PET / Benchmarking Engineer Research & Development Center, Vicksburg, MS","institution_ids":["https://openalex.org/I4210101542","https://openalex.org/I87303767"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070257223","display_name":"Alan L. Scheinine","orcid":null},"institutions":[{"id":"https://openalex.org/I4210101542","display_name":"Benchmark Research (United States)","ror":"https://ror.org/0185yec04","country_code":"US","type":"company","lineage":["https://openalex.org/I4210101542"]},{"id":"https://openalex.org/I87303767","display_name":"U.S. Army Engineer Research and Development Center","ror":"https://ror.org/027mhn368","country_code":"US","type":"facility","lineage":["https://openalex.org/I1304082316","https://openalex.org/I1306490931","https://openalex.org/I1330347796","https://openalex.org/I87303767"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alan Scheinine","raw_affiliation_strings":["DoD HPCMP PET / Benchmarking Engineer Research & Development Center, Vicksburg, MS"],"affiliations":[{"raw_affiliation_string":"DoD HPCMP PET / Benchmarking Engineer Research & Development Center, Vicksburg, MS","institution_ids":["https://openalex.org/I4210101542","https://openalex.org/I87303767"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110862717","display_name":"Ben Parsons","orcid":null},"institutions":[{"id":"https://openalex.org/I87303767","display_name":"U.S. Army Engineer Research and Development Center","ror":"https://ror.org/027mhn368","country_code":"US","type":"facility","lineage":["https://openalex.org/I1304082316","https://openalex.org/I1306490931","https://openalex.org/I1330347796","https://openalex.org/I87303767"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ben Parsons","raw_affiliation_strings":["DoD HPCMP, Engineer Research & Development Center, Vicksburg, MS"],"affiliations":[{"raw_affiliation_string":"DoD HPCMP, Engineer Research & Development Center, Vicksburg, MS","institution_ids":["https://openalex.org/I87303767"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090630635","display_name":"Wesley Emeneker","orcid":null},"institutions":[{"id":"https://openalex.org/I188733180","display_name":"Pacific Disaster Center","ror":"https://ror.org/054g76q14","country_code":"US","type":"government","lineage":["https://openalex.org/I188733180"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wesley Emeneker","raw_affiliation_strings":["Machine Learning Group, Maui HPC Center, Kihei, HI"],"affiliations":[{"raw_affiliation_string":"Machine Learning Group, Maui HPC Center, Kihei, HI","institution_ids":["https://openalex.org/I188733180"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5111940529","display_name":"Robert P. Trevi\u00f1o","orcid":null},"institutions":[{"id":"https://openalex.org/I188733180","display_name":"Pacific Disaster Center","ror":"https://ror.org/054g76q14","country_code":"US","type":"government","lineage":["https://openalex.org/I188733180"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Robert P. Trevino","raw_affiliation_strings":["Machine Learning Group, Maui HPC Center, Kihei, HI"],"affiliations":[{"raw_affiliation_string":"Machine Learning Group, Maui HPC Center, Kihei, HI","institution_ids":["https://openalex.org/I188733180"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5018102608"],"corresponding_institution_ids":["https://openalex.org/I4210101542","https://openalex.org/I87303767"],"apc_list":null,"apc_paid":null,"fwci":0.4885,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.67069974,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"9"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.870937168598175},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.7868728637695312},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7256166934967041},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6681777834892273},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.6616884469985962},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.5575957298278809},{"id":"https://openalex.org/keywords/concurrency","display_name":"Concurrency","score":0.5237638354301453},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.4912874102592468},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.46563875675201416},{"id":"https://openalex.org/keywords/node","display_name":"Node (physics)","score":0.4461129307746887},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.4298161268234253},{"id":"https://openalex.org/keywords/performance-improvement","display_name":"Performance improvement","score":0.4228758215904236},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.3478063941001892},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.33883345127105713},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2636662423610687},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.18986353278160095}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.870937168598175},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.7868728637695312},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7256166934967041},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6681777834892273},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.6616884469985962},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.5575957298278809},{"id":"https://openalex.org/C193702766","wikidata":"https://www.wikidata.org/wiki/Q1414548","display_name":"Concurrency","level":2,"score":0.5237638354301453},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.4912874102592468},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.46563875675201416},{"id":"https://openalex.org/C62611344","wikidata":"https://www.wikidata.org/wiki/Q1062658","display_name":"Node (physics)","level":2,"score":0.4461129307746887},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.4298161268234253},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.4228758215904236},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3478063941001892},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.33883345127105713},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2636662423610687},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.18986353278160095},{"id":"https://openalex.org/C162853370","wikidata":"https://www.wikidata.org/wiki/Q39809","display_name":"Marketing","level":1,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpec43674.2020.9286138","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec43674.2020.9286138","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy","score":0.8999999761581421}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W2108598243","https://openalex.org/W2168231600","https://openalex.org/W2194775991","https://openalex.org/W2259324379","https://openalex.org/W2493744448","https://openalex.org/W2604319603","https://openalex.org/W2617766261","https://openalex.org/W2622263826","https://openalex.org/W2787998955","https://openalex.org/W2790501674","https://openalex.org/W2928072194","https://openalex.org/W2962950660","https://openalex.org/W2964004663","https://openalex.org/W2964330541","https://openalex.org/W2969335882","https://openalex.org/W2970959587","https://openalex.org/W2971624117","https://openalex.org/W2972087877","https://openalex.org/W2985232804","https://openalex.org/W3043571714","https://openalex.org/W3099432326","https://openalex.org/W3117082117","https://openalex.org/W4301239768","https://openalex.org/W6684859321","https://openalex.org/W6738460352","https://openalex.org/W6788592496"],"related_works":["https://openalex.org/W56807374","https://openalex.org/W1800827217","https://openalex.org/W3117082117","https://openalex.org/W1777209000","https://openalex.org/W2045565095","https://openalex.org/W2167006593","https://openalex.org/W2039333207","https://openalex.org/W2592623645","https://openalex.org/W1501209654","https://openalex.org/W2146887761"],"abstract_inverted_index":{"As":[0],"deep":[1],"learning":[2],"on":[3,57,67,72,84],"edge":[4],"computing":[5],"systems":[6,56],"has":[7,20,32],"become":[8,21],"more":[9],"prevalent,":[10],"investigation":[11],"of":[12,39,54,90,172,177],"architectures":[13,141],"and":[14,41,78,100,106,179],"configurations":[15,185],"for":[16,25,43,103,174],"optimal":[17,184],"inference":[18,70,108],"performance":[19,45,53,71,102],"a":[22,88,143],"critical":[23],"step":[24],"proposed":[26],"artificial":[27],"intelligence":[28],"solutions.":[29],"While":[30],"there":[31,47],"been":[33],"considerable":[34,144],"work":[35],"in":[36,87],"the":[37,52,68],"development":[38],"hardware":[40],"software":[42],"high":[44],"inferencing,":[46],"is":[48],"little":[49],"known":[50],"about":[51],"such":[55,161],"HPC":[58,73,91,157],"architectures.":[59],"In":[60,110],"this":[61],"paper,":[62],"we":[63,116,168],"address":[64],"outstanding":[65],"questions":[66],"parallel":[69],"systems.":[74],"We":[75,93,136],"report":[76,183],"results":[77],"recommendations":[79],"derived":[80],"from":[81],"evaluating":[82],"iBench":[83],"multiple":[85,134],"platforms":[86],"variety":[89],"configurations.":[92],"systematically":[94],"benchmark":[95],"single-GPU":[96],"performance,":[97,99],"single-node":[98],"multi-node":[101],"maximum":[104],"client-side":[105],"server-side":[107],"throughput.":[109],"order":[111],"to":[112,127,150,182],"achieve":[113],"linear":[114],"speedup,":[115],"show":[117,137],"that":[118,138,148,159,186],"concurrent":[119],"sending":[120,128],"clients":[121],"must":[122],"be":[123,151],"used,":[124],"as":[125,162],"opposed":[126],"large":[129],"batch":[130,180],"payloads":[131],"parallelized":[132],"across":[133],"GPUs.":[135],"client/server":[139],"inferencing":[140],"add":[142],"data":[145],"transfer":[146],"component":[147],"needs":[149],"taken":[152],"into":[153],"consideration":[154],"when":[155],"benchmarking":[156],"system":[158],"benchmarks":[160],"MLPerf":[163],"do":[164],"not":[165],"measure.":[166],"Finally,":[167],"investigate":[169],"energy":[170],"efficiency":[171],"GPUs":[173],"different":[175],"levels":[176],"concurrency":[178],"sizes":[181],"minimize":[187],"cost":[188],"per":[189],"inference.":[190]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
