{"id":"https://openalex.org/W1982638274","doi":"https://doi.org/10.1145/2063384.2063477","title":"High-performance lattice QCD for multi-core based parallel systems using a cache-friendly hybrid threaded-MPI approach","display_name":"High-performance lattice QCD for multi-core based parallel systems using a cache-friendly hybrid threaded-MPI approach","publication_year":2011,"publication_date":"2011-11-08","ids":{"openalex":"https://openalex.org/W1982638274","doi":"https://doi.org/10.1145/2063384.2063477","mag":"1982638274"},"language":"en","primary_location":{"id":"doi:10.1145/2063384.2063477","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2063384.2063477","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5007892541","display_name":"Mikhail Smelyanskiy","orcid":"https://orcid.org/0000-0002-2433-6110"},"institutions":[{"id":"https://openalex.org/I4210158342","display_name":"Intel (United Kingdom)","ror":"https://ror.org/058cxws58","country_code":"GB","type":"company","lineage":["https://openalex.org/I1343180700","https://openalex.org/I4210158342"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Mikhail Smelyanskiy","raw_affiliation_strings":["Parallel Computing Labs, Intel"],"affiliations":[{"raw_affiliation_string":"Parallel Computing Labs, Intel","institution_ids":["https://openalex.org/I4210158342"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110182945","display_name":"Karthikeyan Vaidyanathan","orcid":null},"institutions":[{"id":"https://openalex.org/I4210158342","display_name":"Intel (United Kingdom)","ror":"https://ror.org/058cxws58","country_code":"GB","type":"company","lineage":["https://openalex.org/I1343180700","https://openalex.org/I4210158342"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Karthikeyan Vaidyanathan","raw_affiliation_strings":["Parallel Computing Labs, Intel"],"affiliations":[{"raw_affiliation_string":"Parallel Computing Labs, Intel","institution_ids":["https://openalex.org/I4210158342"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090972683","display_name":"Jee Choi","orcid":"https://orcid.org/0000-0002-6938-8221"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jee Choi","raw_affiliation_strings":["Georgia Institute of Technology","[Georgia Institute of Technology.]"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology","institution_ids":["https://openalex.org/I130701444"]},{"raw_affiliation_string":"[Georgia Institute of Technology.]","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038029051","display_name":"B\u00e1lint Jo\u00f3","orcid":"https://orcid.org/0000-0002-4229-7960"},"institutions":[{"id":"https://openalex.org/I29801172","display_name":"Thomas Jefferson National Accelerator Facility","ror":"https://ror.org/02vwzrd76","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I29801172","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"B\u00e1lint Jo\u00f3","raw_affiliation_strings":["Thomas Jefferson National Accelerator","Thomas Jefferson National Accelerator Facility"],"affiliations":[{"raw_affiliation_string":"Thomas Jefferson National Accelerator","institution_ids":["https://openalex.org/I29801172"]},{"raw_affiliation_string":"Thomas Jefferson National Accelerator Facility","institution_ids":["https://openalex.org/I29801172"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059686266","display_name":"Jatin Chhugani","orcid":null},"institutions":[{"id":"https://openalex.org/I4210158342","display_name":"Intel (United Kingdom)","ror":"https://ror.org/058cxws58","country_code":"GB","type":"company","lineage":["https://openalex.org/I1343180700","https://openalex.org/I4210158342"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jatin Chhugani","raw_affiliation_strings":["Parallel Computing Labs, Intel"],"affiliations":[{"raw_affiliation_string":"Parallel Computing Labs, Intel","institution_ids":["https://openalex.org/I4210158342"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046199609","display_name":"M. A. Clark","orcid":"https://orcid.org/0000-0001-5211-2002"},"institutions":[{"id":"https://openalex.org/I4210124175","display_name":"Center for Astrophysics Harvard & Smithsonian","ror":"https://ror.org/03c3r2d17","country_code":"US","type":"education","lineage":["https://openalex.org/I103187081","https://openalex.org/I136199984","https://openalex.org/I4210124175"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michael A. Clark","raw_affiliation_strings":["Harvard-Smithsonian Center for Astrophysics"],"affiliations":[{"raw_affiliation_string":"Harvard-Smithsonian Center for Astrophysics","institution_ids":["https://openalex.org/I4210124175"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032238070","display_name":"Pradeep Dubey","orcid":"https://orcid.org/0000-0001-5853-0619"},"institutions":[{"id":"https://openalex.org/I4210158342","display_name":"Intel (United Kingdom)","ror":"https://ror.org/058cxws58","country_code":"GB","type":"company","lineage":["https://openalex.org/I1343180700","https://openalex.org/I4210158342"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Pradeep Dubey","raw_affiliation_strings":["Parallel Computing Labs, Intel"],"affiliations":[{"raw_affiliation_string":"Parallel Computing Labs, Intel","institution_ids":["https://openalex.org/I4210158342"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5007892541"],"corresponding_institution_ids":["https://openalex.org/I4210158342"],"apc_list":null,"apc_paid":null,"fwci":2.0241,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.86946364,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"11"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10224","display_name":"Quantum Chromodynamics and Particle Interactions","score":0.9951000213623047,"subfield":{"id":"https://openalex.org/subfields/3106","display_name":"Nuclear and High Energy Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7744798064231873},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7003927230834961},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.6844518780708313},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.44609713554382324},{"id":"https://openalex.org/keywords/many-core","display_name":"Many core","score":0.42886096239089966},{"id":"https://openalex.org/keywords/lattice","display_name":"Lattice (music)","score":0.4159030318260193},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.34155505895614624},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.16588479280471802}],"concepts":[{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7744798064231873},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7003927230834961},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.6844518780708313},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.44609713554382324},{"id":"https://openalex.org/C3020431745","wikidata":"https://www.wikidata.org/wiki/Q25325220","display_name":"Many core","level":2,"score":0.42886096239089966},{"id":"https://openalex.org/C2781204021","wikidata":"https://www.wikidata.org/wiki/Q6497091","display_name":"Lattice (music)","level":2,"score":0.4159030318260193},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.34155505895614624},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.16588479280471802},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2063384.2063477","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2063384.2063477","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.47999998927116394}],"awards":[{"id":"https://openalex.org/G1628882529","display_name":null,"funder_award_id":"DE-FC02-06ER41440DE-FC02-06ER41449DE-AC05-06OR23177","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G4741242520","display_name":null,"funder_award_id":"OCI-1060067","funder_id":"https://openalex.org/F4320337377","funder_display_name":"Office of Advanced Cyberinfrastructure"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"},{"id":"https://openalex.org/F4320337377","display_name":"Office of Advanced Cyberinfrastructure","ror":"https://ror.org/04nh1dc89"},{"id":"https://openalex.org/F4320338382","display_name":"Thomas Jefferson National Accelerator Facility","ror":"https://ror.org/02vwzrd76"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W31923072","https://openalex.org/W97717844","https://openalex.org/W1498272138","https://openalex.org/W1575350781","https://openalex.org/W1615591201","https://openalex.org/W1660956061","https://openalex.org/W1968025120","https://openalex.org/W1980537345","https://openalex.org/W2039378765","https://openalex.org/W2041827155","https://openalex.org/W2059531889","https://openalex.org/W2065489259","https://openalex.org/W2090058354","https://openalex.org/W2111960802","https://openalex.org/W2121449077","https://openalex.org/W2124904639","https://openalex.org/W2141170493","https://openalex.org/W2141795111","https://openalex.org/W2147743629","https://openalex.org/W2155216327","https://openalex.org/W2155967869","https://openalex.org/W2159937165","https://openalex.org/W2169201109","https://openalex.org/W2295191825","https://openalex.org/W2316564661","https://openalex.org/W2790573981","https://openalex.org/W2963942559","https://openalex.org/W3097409724","https://openalex.org/W3101208208","https://openalex.org/W3121823746","https://openalex.org/W3204390564","https://openalex.org/W4234290723","https://openalex.org/W4252458306","https://openalex.org/W6629666142","https://openalex.org/W6748936291","https://openalex.org/W6771421596"],"related_works":["https://openalex.org/W4248999141","https://openalex.org/W4255057712","https://openalex.org/W2126398188","https://openalex.org/W4251458280","https://openalex.org/W2512412909","https://openalex.org/W1547865754","https://openalex.org/W2116570023","https://openalex.org/W4210605172","https://openalex.org/W4245707462","https://openalex.org/W4243132314"],"abstract_inverted_index":{"Lattice":[0],"Quantum":[1],"Chromo-dynamics":[2],"(LQCD)":[3],"is":[4,26,85,113],"a":[5,27,38,89,137,174,189,201],"computationally":[6],"challenging":[7],"problem":[8],"that":[9,64],"solves":[10],"the":[11,16,32,44,67,74,93,100,118,152,197],"discretized":[12],"Dirac":[13],"equation":[14],"in":[15,66,110],"presence":[17],"of":[18,43,107,144,170,176],"an":[19,53],"SU(3)":[20],"gauge":[21],"field.":[22],"Its":[23],"key":[24],"operation":[25],"matrix-vector":[28],"product,":[29],"known":[30],"as":[31,151],"Dslash":[33],"operator.":[34],"We":[35,164],"have":[36],"developed":[37],"novel":[39,105],"multicore":[40],"architecture-friendly":[41],"implementation":[42,91,106],"Wilson-Dslash":[45,205],"operator":[46],"which":[47],"delivers":[48],"75":[49],"Gflops":[50],"(single-precision)":[51],"on":[52,99,115],"Intel\u00ae":[54],"Xeon\u00ae":[55],"Processor":[56],"X5680":[57],"achieving":[58],"60%":[59],"computational":[60],"efficiency":[61],"for":[62,173],"datasets":[63,71],"fit":[65],"last-level":[68,75],"cache.":[69],"For":[70,196],"larger":[72],"than":[73,88],"cache,":[76],"this":[77,111],"performance":[78,84,142],"drops":[79],"to":[80,162,188],"50":[81],"Gflops.":[82],"Our":[83],"2-3X":[86],"higher":[87],"well-known":[90],"from":[92],"Chroma":[94],"software":[95],"suite":[96],"when":[97,186],"running":[98],"same":[101,198],"hardware":[102],"platform.":[103],"The":[104,141],"LQCD":[108,131],"reported":[109],"paper":[112],"based":[114],"recently":[116],"published":[117],"3.5D":[119],"spatial":[120],"and":[121,157],"4.5D":[122],"temporal":[123],"tiling":[124],"schemes.":[125],"Both":[126],"blocking":[127],"schemes":[128,146],"significantly":[129],"reduce":[130],"external":[132,158],"memory":[133,159],"bandwidth":[134,160],"requirements,":[135],"delivering":[136],"more":[138,149],"compute-bound":[139],"implementation.":[140],"advantage":[143],"our":[145,171],"will":[147],"become":[148],"significant":[150],"gap":[153],"between":[154],"compute":[155],"flops":[156],"continues":[161],"grow.":[163],"demonstrate":[165],"very":[166],"good":[167],"cluster-level":[168],"scalability":[169],"implementation:":[172],"lattice":[175,199],"323":[177],"x":[178],"256":[179],"sites,":[180],"we":[181],"achieve":[182],"over":[183],"4":[184],"Tflops":[185],"strong-scaled":[187],"128":[190],"node":[191],"system":[192],"(1536":[193],"cores":[194],"total).":[195],"size,":[200],"full":[202],"Conjugate":[203],"Gradients":[204],"operator,":[206],"achieves":[207],"2.95":[208],"Tflops.":[209]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":1},{"year":2013,"cited_by_count":3},{"year":2012,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
