{"id":"https://openalex.org/W4413145808","doi":"https://doi.org/10.1109/ipdpsw66978.2025.00061","title":"A Header-Based C++ Library for Computing Hessian on GPU using Automatic Differentiation","display_name":"A Header-Based C++ Library for Computing Hessian on GPU using Automatic Differentiation","publication_year":2025,"publication_date":"2025-06-03","ids":{"openalex":"https://openalex.org/W4413145808","doi":"https://doi.org/10.1109/ipdpsw66978.2025.00061"},"language":"en","primary_location":{"id":"doi:10.1109/ipdpsw66978.2025.00061","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipdpsw66978.2025.00061","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102707396","display_name":"Desh Ranjan","orcid":null},"institutions":[{"id":"https://openalex.org/I81365321","display_name":"Old Dominion University","ror":"https://ror.org/04zjtrb98","country_code":"US","type":"education","lineage":["https://openalex.org/I81365321"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Desh Ranjan","raw_affiliation_strings":["Old Dominion University,Department of Computer Science,Norfolk,Virginia,USA"],"affiliations":[{"raw_affiliation_string":"Old Dominion University,Department of Computer Science,Norfolk,Virginia,USA","institution_ids":["https://openalex.org/I81365321"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101789500","display_name":"Mohammad Zubair","orcid":"https://orcid.org/0000-0002-5449-1779"},"institutions":[{"id":"https://openalex.org/I81365321","display_name":"Old Dominion University","ror":"https://ror.org/04zjtrb98","country_code":"US","type":"education","lineage":["https://openalex.org/I81365321"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mohammad Zubair","raw_affiliation_strings":["Old Dominion University,Department of Computer Science,Norfolk,Virginia,USA"],"affiliations":[{"raw_affiliation_string":"Old Dominion University,Department of Computer Science,Norfolk,Virginia,USA","institution_ids":["https://openalex.org/I81365321"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5102707396"],"corresponding_institution_ids":["https://openalex.org/I81365321"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.39575386,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"355","last_page":"364"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10481","display_name":"Computer Graphics and Visualization Techniques","score":0.7858999967575073,"subfield":{"id":"https://openalex.org/subfields/1704","display_name":"Computer Graphics and Computer-Aided Design"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10481","display_name":"Computer Graphics and Visualization Techniques","score":0.7858999967575073,"subfield":{"id":"https://openalex.org/subfields/1704","display_name":"Computer Graphics and Computer-Aided Design"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.7450000047683716,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13650","display_name":"Computational Physics and Python Applications","score":0.7067999839782715,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/header","display_name":"Header","score":0.9003301858901978},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7073665857315063},{"id":"https://openalex.org/keywords/hessian-matrix","display_name":"Hessian matrix","score":0.6606811881065369},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6349983215332031},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.5025362968444824},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.4610535204410553},{"id":"https://openalex.org/keywords/automatic-differentiation","display_name":"Automatic differentiation","score":0.45950716733932495},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.36917558312416077},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.3376772999763489},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.163929283618927},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.12168318033218384},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.07331836223602295},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.07285594940185547}],"concepts":[{"id":"https://openalex.org/C48105269","wikidata":"https://www.wikidata.org/wiki/Q1141160","display_name":"Header","level":2,"score":0.9003301858901978},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7073665857315063},{"id":"https://openalex.org/C203616005","wikidata":"https://www.wikidata.org/wiki/Q620495","display_name":"Hessian matrix","level":2,"score":0.6606811881065369},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6349983215332031},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.5025362968444824},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.4610535204410553},{"id":"https://openalex.org/C133512626","wikidata":"https://www.wikidata.org/wiki/Q787371","display_name":"Automatic differentiation","level":3,"score":0.45950716733932495},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.36917558312416077},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3376772999763489},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.163929283618927},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.12168318033218384},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.07331836223602295},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.07285594940185547},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.0},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ipdpsw66978.2025.00061","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipdpsw66978.2025.00061","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2055550450","https://openalex.org/W2054859890","https://openalex.org/W1545275724","https://openalex.org/W3196095186","https://openalex.org/W3188536927","https://openalex.org/W2029878367","https://openalex.org/W2071825043","https://openalex.org/W2601195525","https://openalex.org/W1977710666","https://openalex.org/W2068150956"],"abstract_inverted_index":{"The":[0,112],"Hessian-vector":[1,24,265,276],"product":[2,266,277],"computation":[3,51,86,98,124],"appears":[4],"in":[5,11,57,221,253],"many":[6,27],"scientific":[7],"applications":[8],"such":[9,78,186],"as":[10,79,141,187,216],"optimization":[12],"and":[13,54,66,152,176,233],"finite":[14],"element":[15],"modeling.":[16],"Often":[17],"there":[18],"is":[19,46,94,117,139,230,270,305],"a":[20,88,126,142,162,170,241,244],"need":[21],"for":[22,150,160,263,267,278,283,297],"computing":[23,298],"products":[25,56,168],"at":[26,69],"data":[28],"points":[29],"concurrently.":[30,111,137],"We":[31,154,209,225],"propose":[32],"an":[33],"automatic":[34],"differentiation":[35],"(AD)":[36],"based":[37],"method,":[38],"CHESSFAD":[39,59,83,120,138,159,193,303],"(Chunked":[40],"HESSian":[41],"using":[42,63,301],"Forward-mode":[43],"AD),":[44],"that":[45,72,147,192,227],"designed":[47],"with":[48,202,238],"efficient":[49],"parallel":[50],"of":[52,87,90,96,99,104,115,125,158,165,172,219,240,243,247,290,293],"Hessian":[53,92,106,127,239,300],"Hessian-Vector":[55,167],"mind.":[58],"computes":[60],"second-order":[61],"derivatives":[62],"forward":[64],"mode":[65],"exposes":[67],"parallelism":[68,116],"different":[70,132],"levels":[71],"can":[73,108,134],"be":[74,109,135],"exploited":[75],"on":[76,169,198,207,214,281],"accelerators":[77],"NVIDIA":[80],"GPUs.":[81,153],"In":[82],"approach,":[84],"the":[85,91,97,105,123,156,217,222,258,264,275,291,299,302],"row":[89,128],"matrix":[93,107],"independent":[95,166],"other":[100,181],"rows.":[101],"Hence":[102],"rows":[103],"computed":[110,136],"second":[113],"level":[114],"exposed":[118],"because":[119],"approach":[121,229,304],"partitions":[122],"into":[129],"chunks,":[130],"where":[131],"chunks":[133],"implemented":[140],"lightweight":[143],"header-based":[144,183],"C++":[145,184],"library":[146],"works":[148],"both":[149],"CPUs":[151],"evaluate":[155],"performance":[157,179],"performing":[161],"large":[163,245],"number":[164,218,246,292],"set":[171],"standard":[173],"test":[174],"functions,":[175],"compare":[177],"its":[178,212],"to":[180,236,273],"existing":[182],"libraries":[185],"autodiff.":[188],"Our":[189],"results":[190],"show":[191],"performs":[194],"better":[195],"than":[196],"autodiff,":[197],"all":[199,284],"these":[200],"functions":[201],"improvement":[203],"ranging":[204],"from":[205],"5-50%":[206],"average.":[208],"also":[210,306],"analyze":[211],"efficiency":[213],"GPUs":[215],"variables":[220,269,280],"function":[223,242],"grows.":[224],"demonstrate":[226],"our":[228],"easily":[231],"parallelizable":[232],"enables":[234],"us":[235],"work":[237],"variables,":[248],"which":[249],"was":[250],"not":[251],"possible":[252],"sequential":[254,259],"implementation.":[255],"For":[256],"example,":[257],"execution":[260],"time":[261],"required":[262],"two":[268],"approximately":[271],"enough":[272],"compute":[274],"16":[279],"GPU":[282],"three":[285],"functions.":[286],"A":[287],"basic":[288],"analysis":[289],"arithmetic":[294],"operations":[295],"needed":[296],"provided.":[307]},"counts_by_year":[],"updated_date":"2025-12-28T23:10:05.387466","created_date":"2025-10-10T00:00:00"}
