{"id":"https://openalex.org/W2098077956","doi":"https://doi.org/10.1109/ipdps.2010.5470481","title":"Improving numerical reproducibility and stability in large-scale numerical simulations on GPUs","display_name":"Improving numerical reproducibility and stability in large-scale numerical simulations on GPUs","publication_year":2010,"publication_date":"2010-01-01","ids":{"openalex":"https://openalex.org/W2098077956","doi":"https://doi.org/10.1109/ipdps.2010.5470481","mag":"2098077956"},"language":"en","primary_location":{"id":"doi:10.1109/ipdps.2010.5470481","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipdps.2010.5470481","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2010 IEEE International Symposium on Parallel &amp; Distributed Processing (IPDPS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5078387820","display_name":"Michela Taufer","orcid":"https://orcid.org/0000-0002-0031-6377"},"institutions":[{"id":"https://openalex.org/I86501945","display_name":"University of Delaware","ror":"https://ror.org/01sbq1a82","country_code":"US","type":"education","lineage":["https://openalex.org/I86501945"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michela Taufer","raw_affiliation_strings":["Department of Computer & Information Sciences, University of Delaware, USA","Department of Computer and Information Sciences, University of Delaware"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer & Information Sciences, University of Delaware, USA","institution_ids":["https://openalex.org/I86501945"]},{"raw_affiliation_string":"Department of Computer and Information Sciences, University of Delaware","institution_ids":["https://openalex.org/I86501945"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050348193","display_name":"Omar Padron","orcid":null},"institutions":[{"id":"https://openalex.org/I47449453","display_name":"Kean University","ror":"https://ror.org/04wzzqn13","country_code":"US","type":"education","lineage":["https://openalex.org/I47449453"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Omar Padron","raw_affiliation_strings":["New Jersey Center of Science, Technology and Mathematics Education, Kean University, USA","[New Jersey Center for Science, Tech., and Math. Education, Kean University]"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"New Jersey Center of Science, Technology and Mathematics Education, Kean University, USA","institution_ids":["https://openalex.org/I47449453"]},{"raw_affiliation_string":"[New Jersey Center for Science, Tech., and Math. Education, Kean University]","institution_ids":["https://openalex.org/I47449453"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088650171","display_name":"Philip Saponaro","orcid":"https://orcid.org/0000-0002-9486-463X"},"institutions":[{"id":"https://openalex.org/I86501945","display_name":"University of Delaware","ror":"https://ror.org/01sbq1a82","country_code":"US","type":"education","lineage":["https://openalex.org/I86501945"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Philip Saponaro","raw_affiliation_strings":["Department of Computer & Information Sciences, University of Delaware, USA","Department of Computer and Information Sciences, University of Delaware"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer & Information Sciences, University of Delaware, USA","institution_ids":["https://openalex.org/I86501945"]},{"raw_affiliation_string":"Department of Computer and Information Sciences, University of Delaware","institution_ids":["https://openalex.org/I86501945"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065269204","display_name":"Sandeep Patel","orcid":"https://orcid.org/0000-0002-6136-3556"},"institutions":[{"id":"https://openalex.org/I86501945","display_name":"University of Delaware","ror":"https://ror.org/01sbq1a82","country_code":"US","type":"education","lineage":["https://openalex.org/I86501945"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sandeep Patel","raw_affiliation_strings":["Department of Chemistry, University of Delaware, USA","[Dept. of Chemistry, University of Delaware]"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Chemistry, University of Delaware, USA","institution_ids":["https://openalex.org/I86501945"]},{"raw_affiliation_string":"[Dept. of Chemistry, University of Delaware]","institution_ids":["https://openalex.org/I86501945"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":3.7285,"has_fulltext":false,"cited_by_count":50,"citation_normalized_percentile":{"value":0.93417731,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"5462","issue":null,"first_page":"1","last_page":"9"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9761000275611877,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7856619358062744},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6464645862579346},{"id":"https://openalex.org/keywords/double-precision-floating-point-format","display_name":"Double-precision floating-point format","score":0.6449986696243286},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.6077415943145752},{"id":"https://openalex.org/keywords/single-precision-floating-point-format","display_name":"Single-precision floating-point format","score":0.5675501823425293},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.5500284433364868},{"id":"https://openalex.org/keywords/graphics-processing-unit","display_name":"Graphics processing unit","score":0.5356200933456421},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.52940833568573},{"id":"https://openalex.org/keywords/integrator","display_name":"Integrator","score":0.5274075865745544},{"id":"https://openalex.org/keywords/massively-parallel","display_name":"Massively parallel","score":0.5189433097839355},{"id":"https://openalex.org/keywords/arbitrary-precision-arithmetic","display_name":"Arbitrary-precision arithmetic","score":0.4846668541431427},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.4719737470149994},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.47128644585609436},{"id":"https://openalex.org/keywords/floating-point","display_name":"Floating point","score":0.45094066858291626},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.283439040184021}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7856619358062744},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6464645862579346},{"id":"https://openalex.org/C35912277","wikidata":"https://www.wikidata.org/wiki/Q1243369","display_name":"Double-precision floating-point format","level":3,"score":0.6449986696243286},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.6077415943145752},{"id":"https://openalex.org/C133095886","wikidata":"https://www.wikidata.org/wiki/Q1307173","display_name":"Single-precision floating-point format","level":3,"score":0.5675501823425293},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.5500284433364868},{"id":"https://openalex.org/C2779851693","wikidata":"https://www.wikidata.org/wiki/Q183484","display_name":"Graphics processing unit","level":2,"score":0.5356200933456421},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.52940833568573},{"id":"https://openalex.org/C79518650","wikidata":"https://www.wikidata.org/wiki/Q2081431","display_name":"Integrator","level":3,"score":0.5274075865745544},{"id":"https://openalex.org/C190475519","wikidata":"https://www.wikidata.org/wiki/Q544384","display_name":"Massively parallel","level":2,"score":0.5189433097839355},{"id":"https://openalex.org/C83581934","wikidata":"https://www.wikidata.org/wiki/Q527381","display_name":"Arbitrary-precision arithmetic","level":2,"score":0.4846668541431427},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.4719737470149994},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.47128644585609436},{"id":"https://openalex.org/C84211073","wikidata":"https://www.wikidata.org/wiki/Q117879","display_name":"Floating point","level":2,"score":0.45094066858291626},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.283439040184021},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.0},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/ipdps.2010.5470481","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipdps.2010.5470481","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2010 IEEE International Symposium on Parallel &amp; Distributed Processing (IPDPS)","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.628.4787","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.628.4787","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://gcl.cis.udel.edu/publications/conferences/010IPDPSmt.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Affordable and clean energy","score":0.8799999952316284,"id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320308633","display_name":"Computing Research Association","ror":"https://ror.org/00agrkd75"},{"id":"https://openalex.org/F4320309480","display_name":"Nvidia","ror":"https://ror.org/03jdj4y14"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W126446084","https://openalex.org/W1596774780","https://openalex.org/W1967777563","https://openalex.org/W1973858832","https://openalex.org/W2058978358","https://openalex.org/W2074519777","https://openalex.org/W2078391824","https://openalex.org/W2079450792","https://openalex.org/W2106191211","https://openalex.org/W2112961862","https://openalex.org/W2145804981","https://openalex.org/W2159497832","https://openalex.org/W2162166182","https://openalex.org/W4231978535","https://openalex.org/W4298846468","https://openalex.org/W6605153215","https://openalex.org/W6635804240"],"related_works":["https://openalex.org/W2116803521","https://openalex.org/W2773283032","https://openalex.org/W3150959508","https://openalex.org/W2239119680","https://openalex.org/W1564887326","https://openalex.org/W1571090276","https://openalex.org/W2185760795","https://openalex.org/W2930605373","https://openalex.org/W2012407419","https://openalex.org/W3215589575"],"abstract_inverted_index":{"The":[0],"advent":[1],"of":[2,29,70,102,109,142,168,175,178,207],"general":[3],"purpose":[4],"graphics":[5],"processing":[6],"units":[7],"(GPGPU's)":[8],"brings":[9],"about":[10],"a":[11,32,176,205],"whole":[12],"new":[13],"platform":[14],"for":[15,43,96],"running":[16],"numerically":[17],"intensive":[18],"applications":[19],"at":[20,67],"high":[21],"speeds.":[22],"Their":[23],"multi-core":[24],"architectures":[25],"enable":[26],"large":[27],"degrees":[28],"parallelism":[30],"via":[31],"massively":[33],"multi-threaded":[34],"environment.":[35],"Molecular":[36],"dynamics":[37],"(MD)":[38],"simulations":[39,80],"are":[40,48,54],"particularly":[41],"well-suited":[42],"GPU's":[44],"because":[45],"their":[46],"computations":[47],"easily":[49],"parallelizable.":[50],"Significant":[51],"performance":[52,65,158],"improvements":[53],"observed":[55],"when":[56,162],"single":[57,128,169],"precision":[58,117,170],"floating-point":[59],"arithmetic":[60,118],"is":[61,73,106,124],"used.":[62],"However,":[63],"this":[64,103,121,136],"comes":[66],"the":[68,84,89,98,107,113,140,164,189,193,202,211],"cost":[69],"accuracy:":[71],"it":[72],"widely":[74],"acknowledged":[75],"that":[76,161,181],"constant-energy":[77],"(NVE)":[78],"MD":[79,152,212],"accumulate":[81],"errors":[82,91],"as":[83,112],"simulation":[85,114],"proceeds":[86],"due":[87],"to":[88,131,145,160,187],"inherent":[90],"associated":[92],"with":[93,204],"integrators":[94],"used":[95],"propagating":[97],"coordinates.":[99],"A":[100],"consequence":[101],"numerical":[104,147],"integration":[105],"drift":[108],"potential":[110],"energy":[111],"proceeds.":[115],"Double":[116],"partially":[119],"corrects":[120],"drifting,":[122],"but":[123],"significantly":[125],"slower":[126],"than":[127],"precision,":[129],"comparable":[130,159],"CPU":[132],"performance.":[133],"To":[134],"address":[135],"problem,":[137],"we":[138],"extend":[139],"approaches":[141],"previous":[143],"literature":[144],"improve":[146],"reproducibility":[148],"and":[149,157,184],"stability":[150],"in":[151],"simulations,":[153],"while":[154],"assuring":[155],"efficiency":[156],"using":[163],"GPU":[165],"hardware":[166],"implementation":[167],"arithmetic.":[171],"We":[172,199],"present":[173],"development":[174],"library":[177,203],"mathematical":[179],"functions":[180],"use":[182],"fast":[183],"efficient":[185],"algorithms":[186],"fix":[188],"error":[190],"produced":[191],"by":[192,197],"equivalent":[194],"operations":[195],"performed":[196],"GPU.":[198],"successfully":[200],"validate":[201],"suite":[206],"synthetic":[208],"codes":[209],"emulating":[210],"behavior":[213],"on":[214],"GPUs.":[215]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":7},{"year":2016,"cited_by_count":4},{"year":2015,"cited_by_count":7},{"year":2014,"cited_by_count":6},{"year":2013,"cited_by_count":2},{"year":2012,"cited_by_count":3}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
