{"id":"https://openalex.org/W2808072032","doi":"https://doi.org/10.1145/3199605","title":"NUMA-Caffe","display_name":"NUMA-Caffe","publication_year":2018,"publication_date":"2018-06-08","ids":{"openalex":"https://openalex.org/W2808072032","doi":"https://doi.org/10.1145/3199605","mag":"2808072032"},"language":"en","primary_location":{"id":"doi:10.1145/3199605","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3199605","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3199605","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3199605","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036385652","display_name":"Probir Roy","orcid":"https://orcid.org/0000-0001-8877-0787"},"institutions":[{"id":"https://openalex.org/I267592682","display_name":"Williams (United States)","ror":"https://ror.org/007zhvp17","country_code":"US","type":"company","lineage":["https://openalex.org/I267592682"]},{"id":"https://openalex.org/I16285277","display_name":"William & Mary","ror":"https://ror.org/03hsf0573","country_code":"US","type":"education","lineage":["https://openalex.org/I16285277"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Probir Roy","raw_affiliation_strings":["College of William and Mary, Williamsburg, VA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of William and Mary, Williamsburg, VA","institution_ids":["https://openalex.org/I16285277","https://openalex.org/I267592682"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043209884","display_name":"Shuaiwen Leon Song","orcid":"https://orcid.org/0000-0002-8402-1436"},"institutions":[{"id":"https://openalex.org/I16285277","display_name":"William & Mary","ror":"https://ror.org/03hsf0573","country_code":"US","type":"education","lineage":["https://openalex.org/I16285277"]},{"id":"https://openalex.org/I142606810","display_name":"Pacific Northwest National Laboratory","ror":"https://ror.org/05h992307","country_code":"US","type":"facility","lineage":["https://openalex.org/I1325736334","https://openalex.org/I1330989302","https://openalex.org/I142606810","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shuaiwen Leon Song","raw_affiliation_strings":["Pacific Northwest National Laboratory and College of William and Mary, Richland,WA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Pacific Northwest National Laboratory and College of William and Mary, Richland,WA","institution_ids":["https://openalex.org/I142606810","https://openalex.org/I16285277"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006650430","display_name":"Sriram Krishnamoorthy","orcid":"https://orcid.org/0000-0002-4682-1002"},"institutions":[{"id":"https://openalex.org/I142606810","display_name":"Pacific Northwest National Laboratory","ror":"https://ror.org/05h992307","country_code":"US","type":"facility","lineage":["https://openalex.org/I1325736334","https://openalex.org/I1330989302","https://openalex.org/I142606810","https://openalex.org/I39565521"]},{"id":"https://openalex.org/I16285277","display_name":"William & Mary","ror":"https://ror.org/03hsf0573","country_code":"US","type":"education","lineage":["https://openalex.org/I16285277"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sriram Krishnamoorthy","raw_affiliation_strings":["Pacific Northwest National Laboratory and College of William and Mary, Richland,WA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Pacific Northwest National Laboratory and College of William and Mary, Richland,WA","institution_ids":["https://openalex.org/I142606810","https://openalex.org/I16285277"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009322871","display_name":"Abhinav Vishnu","orcid":"https://orcid.org/0000-0002-0593-4780"},"institutions":[{"id":"https://openalex.org/I16285277","display_name":"William & Mary","ror":"https://ror.org/03hsf0573","country_code":"US","type":"education","lineage":["https://openalex.org/I16285277"]},{"id":"https://openalex.org/I142606810","display_name":"Pacific Northwest National Laboratory","ror":"https://ror.org/05h992307","country_code":"US","type":"facility","lineage":["https://openalex.org/I1325736334","https://openalex.org/I1330989302","https://openalex.org/I142606810","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Abhinav Vishnu","raw_affiliation_strings":["Pacific Northwest National Laboratory and College of William and Mary, Richland,WA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Pacific Northwest National Laboratory and College of William and Mary, Richland,WA","institution_ids":["https://openalex.org/I142606810","https://openalex.org/I16285277"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084943434","display_name":"Dipanjan Sengupta","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]},{"id":"https://openalex.org/I881441977","display_name":"Los Angeles Mission College","ror":"https://ror.org/01stcbz02","country_code":"US","type":"education","lineage":["https://openalex.org/I2802998804","https://openalex.org/I881441977"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dipanjan Sengupta","raw_affiliation_strings":["Intel Labs, Mission College Blvd., Santa Clara, CA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Intel Labs, Mission College Blvd., Santa Clara, CA","institution_ids":["https://openalex.org/I881441977","https://openalex.org/I1343180700"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100331713","display_name":"Xu Liu","orcid":"https://orcid.org/0000-0002-8984-5485"},"institutions":[{"id":"https://openalex.org/I16285277","display_name":"William & Mary","ror":"https://ror.org/03hsf0573","country_code":"US","type":"education","lineage":["https://openalex.org/I16285277"]},{"id":"https://openalex.org/I267592682","display_name":"Williams (United States)","ror":"https://ror.org/007zhvp17","country_code":"US","type":"company","lineage":["https://openalex.org/I267592682"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xu Liu","raw_affiliation_strings":["College of William and Mary, Williamsburg, VA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of William and Mary, Williamsburg, VA","institution_ids":["https://openalex.org/I16285277","https://openalex.org/I267592682"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5036385652"],"corresponding_institution_ids":["https://openalex.org/I16285277","https://openalex.org/I267592682"],"apc_list":null,"apc_paid":null,"fwci":1.3783,"has_fulltext":true,"cited_by_count":21,"citation_normalized_percentile":{"value":0.86055393,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"15","issue":"2","first_page":"1","last_page":"26"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8871399164199829},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.7645798921585083},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6959294676780701},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5900577902793884},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4914504289627075},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.46796780824661255},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.44384193420410156},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.4428296685218811},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.4318106770515442},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.4119947552680969}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8871399164199829},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.7645798921585083},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6959294676780701},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5900577902793884},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4914504289627075},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.46796780824661255},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.44384193420410156},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.4428296685218811},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4318106770515442},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.4119947552680969},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3199605","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3199605","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3199605","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3199605","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3199605","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3199605","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.4099999964237213,"display_name":"Industry, innovation and infrastructure"}],"awards":[{"id":"https://openalex.org/G1899721271","display_name":null,"funder_award_id":"AC05-76RL01830","funder_id":"https://openalex.org/F4320306250","funder_display_name":"Battelle"},{"id":"https://openalex.org/G2506945562","display_name":null,"funder_award_id":"76RL01830","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G2510423872","display_name":null,"funder_award_id":"AC05-76RL01830","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G3289159562","display_name":null,"funder_award_id":"No. DE-AC05-76RL01830","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G3670379805","display_name":null,"funder_award_id":"1618620","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5200347334","display_name":null,"funder_award_id":"DE-AC05-76RL01830","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G5321382714","display_name":null,"funder_award_id":"76RL01830","funder_id":"https://openalex.org/F4320338354","funder_display_name":"Pacific Northwest National Laboratory"},{"id":"https://openalex.org/G5726405315","display_name":null,"funder_award_id":"DE-AC05","funder_id":"https://openalex.org/F4320306250","funder_display_name":"Battelle"},{"id":"https://openalex.org/G6664334341","display_name":null,"funder_award_id":"DE-AC05-76RL01830","funder_id":"https://openalex.org/F4320306250","funder_display_name":"Battelle"},{"id":"https://openalex.org/G7200503492","display_name":null,"funder_award_id":"DE-AC05-76RL01830","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7560550554","display_name":null,"funder_award_id":"76RL01830","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7995982022","display_name":null,"funder_award_id":"DE-AC05","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"},{"id":"https://openalex.org/F4320306250","display_name":"Battelle","ror":"https://ror.org/01h5tnr73"},{"id":"https://openalex.org/F4320338354","display_name":"Pacific Northwest National Laboratory","ror":"https://ror.org/05h992307"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2808072032.pdf","grobid_xml":"https://content.openalex.org/works/W2808072032.grobid-xml"},"referenced_works_count":49,"referenced_works":["https://openalex.org/W1442374986","https://openalex.org/W1547840952","https://openalex.org/W1572624911","https://openalex.org/W1598866093","https://openalex.org/W1641327695","https://openalex.org/W1667652561","https://openalex.org/W1788418780","https://openalex.org/W1842336537","https://openalex.org/W1948591071","https://openalex.org/W1969923711","https://openalex.org/W2081612620","https://openalex.org/W2091905905","https://openalex.org/W2093852742","https://openalex.org/W2097117768","https://openalex.org/W2103460560","https://openalex.org/W2105506259","https://openalex.org/W2119391823","https://openalex.org/W2119407198","https://openalex.org/W2120432001","https://openalex.org/W2127941149","https://openalex.org/W2132424367","https://openalex.org/W2132737349","https://openalex.org/W2144839971","https://openalex.org/W2147768505","https://openalex.org/W2152419477","https://openalex.org/W2160815625","https://openalex.org/W2168231600","https://openalex.org/W2181607856","https://openalex.org/W2186615578","https://openalex.org/W2198403777","https://openalex.org/W2204075824","https://openalex.org/W2271840356","https://openalex.org/W2277932597","https://openalex.org/W2294581108","https://openalex.org/W2336650964","https://openalex.org/W2339765813","https://openalex.org/W2414181241","https://openalex.org/W2580537508","https://openalex.org/W2580688187","https://openalex.org/W2587538916","https://openalex.org/W2605019159","https://openalex.org/W2618530766","https://openalex.org/W2622263826","https://openalex.org/W2724382637","https://openalex.org/W2749988060","https://openalex.org/W2950094539","https://openalex.org/W2951781666","https://openalex.org/W2962911728","https://openalex.org/W3118608800"],"related_works":["https://openalex.org/W2058965144","https://openalex.org/W2499279132","https://openalex.org/W271331623","https://openalex.org/W1974690493","https://openalex.org/W2264746079","https://openalex.org/W3012895752","https://openalex.org/W2007651078","https://openalex.org/W1987160526","https://openalex.org/W3023876411","https://openalex.org/W91363257"],"abstract_inverted_index":{"Convolution":[0],"Neural":[1,10],"Networks":[2,11],"(CNNs),":[3],"a":[4,113,127,188],"special":[5],"subcategory":[6],"of":[7,73,130,169,214],"Deep":[8],"Learning":[9],"(DNNs),":[12],"have":[13,35,75],"become":[14],"increasingly":[15],"popular":[16],"in":[17,25,39,79,138,212],"industry":[18],"and":[19,30,54,66,81,162,178,197,217],"academia":[20],"for":[21,45,61,106,154],"their":[22],"powerful":[23],"capability":[24],"pattern":[26],"classification,":[27],"image":[28],"processing,":[29],"speech":[31],"recognition.":[32],"Recently,":[33],"they":[34],"been":[36],"widely":[37],"adopted":[38],"High":[40],"Performance":[41],"Computing":[42],"(HPC)":[43],"environments":[44],"solving":[46],"complex":[47],"problems":[48],"related":[49],"to":[50,98,126,133,182],"modeling,":[51],"runtime":[52],"prediction,":[53],"big":[55],"data":[56],"analysis.":[57],"Current":[58],"state-of-the-art":[59,209],"designs":[60,211],"DNNs":[62],"on":[63,160,192],"modern":[64,89],"multi-":[65,161,196],"many-core":[67,163,198],"CPU":[68,90,164],"architectures,":[69,199],"such":[70],"as":[71],"variants":[72],"Caffe,":[74],"reported":[76],"promising":[77],"performance":[78],"speedup":[80],"scalability,":[82],"comparable":[83],"with":[84],"the":[85,183,208],"GPU":[86],"implementations.":[87],"However,":[88],"architectures":[91],"employ":[92],"Non-Uniform":[93],"Memory":[94],"Access":[95],"(NUMA)":[96],"technique":[97],"integrate":[99],"multiple":[100],"sockets,":[101],"which":[102],"incurs":[103],"unique":[104],"challenges":[105],"designing":[107],"highly":[108],"efficient":[109],"CNN":[110,149],"frameworks.":[111],"Without":[112],"careful":[114],"design,":[115,150],"DNN":[116,170],"frameworks":[117],"can":[118],"easily":[119],"suffer":[120],"from":[121],"long":[122],"memory":[123,131],"latency":[124],"due":[125],"large":[128],"number":[129],"accesses":[132],"remote":[134],"NUMA":[135],"domains,":[136],"resulting":[137],"poor":[139],"scalability.":[140,218],"To":[141],"address":[142],"this":[143],"challenge,":[144],"we":[145],"propose":[146],"NUMA-aware":[147],"multi-solver-based":[148],"named":[151],"NUMA-Caffe":[152,166,205],",":[153],"accelerating":[155],"deep":[156],"learning":[157],"neural":[158],"networks":[159],"architectures.":[165],"is":[167],"independent":[168],"topology,":[171],"does":[172],"not":[173],"impact":[174],"network":[175],"convergence":[176],"rates,":[177],"provides":[179],"superior":[180],"scalability":[181],"existing":[184],"Caffe":[185,210],"variants.":[186],"Through":[187],"thorough":[189],"empirical":[190],"study":[191],"four":[193],"contemporary":[194],"NUMA-based":[195],"our":[200],"experimental":[201],"results":[202],"demonstrate":[203],"that":[204],"significantly":[206],"outperforms":[207],"terms":[213],"both":[215],"throughput":[216]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":5}],"updated_date":"2026-05-04T08:30:34.212998","created_date":"2018-06-21T00:00:00"}
