{"id":"https://openalex.org/W4416004167","doi":"https://doi.org/10.1145/3731599.3767437","title":"Architecting Tensor Core-Based Reductions for Irregular Molecular Docking Kernels","display_name":"Architecting Tensor Core-Based Reductions for Irregular Molecular Docking Kernels","publication_year":2025,"publication_date":"2025-11-07","ids":{"openalex":"https://openalex.org/W4416004167","doi":"https://doi.org/10.1145/3731599.3767437"},"language":null,"primary_location":{"id":"doi:10.1145/3731599.3767437","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3731599.3767437","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SC '25 Workshops of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5037721262","display_name":"Leonardo Solis-Vasquez","orcid":"https://orcid.org/0000-0001-6896-9879"},"institutions":[{"id":"https://openalex.org/I31512782","display_name":"Technische Universit\u00e4t Darmstadt","ror":"https://ror.org/05n911h24","country_code":"DE","type":"education","lineage":["https://openalex.org/I31512782"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Leonardo Solis-Vasquez","raw_affiliation_strings":["Technical University of Darmstadt, Darmstadt, Germany"],"raw_orcid":"https://orcid.org/0000-0001-6896-9879","affiliations":[{"raw_affiliation_string":"Technical University of Darmstadt, Darmstadt, Germany","institution_ids":["https://openalex.org/I31512782"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043900157","display_name":"Andreas F. Tillack","orcid":"https://orcid.org/0000-0002-1832-3030"},"institutions":[{"id":"https://openalex.org/I123431417","display_name":"Scripps Research Institute","ror":"https://ror.org/02dxx6824","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I123431417"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Andreas F. Tillack","raw_affiliation_strings":["Scripps Research, La Jolla, USA"],"raw_orcid":"https://orcid.org/0000-0002-1832-3030","affiliations":[{"raw_affiliation_string":"Scripps Research, La Jolla, USA","institution_ids":["https://openalex.org/I123431417"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072196443","display_name":"Diogo Santos\u2010Martins","orcid":"https://orcid.org/0000-0003-4622-3747"},"institutions":[{"id":"https://openalex.org/I123431417","display_name":"Scripps Research Institute","ror":"https://ror.org/02dxx6824","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I123431417"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Diogo Santos-Martins","raw_affiliation_strings":["Scripps Research, La Jolla, USA"],"raw_orcid":"https://orcid.org/0000-0003-4622-3747","affiliations":[{"raw_affiliation_string":"Scripps Research, La Jolla, USA","institution_ids":["https://openalex.org/I123431417"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047000315","display_name":"Andreas Koch","orcid":"https://orcid.org/0000-0002-1164-3082"},"institutions":[{"id":"https://openalex.org/I31512782","display_name":"Technische Universit\u00e4t Darmstadt","ror":"https://ror.org/05n911h24","country_code":"DE","type":"education","lineage":["https://openalex.org/I31512782"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Andreas Koch","raw_affiliation_strings":["Technical University of Darmstadt, Darmstadt, Germany"],"raw_orcid":"https://orcid.org/0000-0002-1164-3082","affiliations":[{"raw_affiliation_string":"Technical University of Darmstadt, Darmstadt, Germany","institution_ids":["https://openalex.org/I31512782"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5068867875","display_name":"Stefano Forli","orcid":"https://orcid.org/0000-0002-5964-7111"},"institutions":[{"id":"https://openalex.org/I123431417","display_name":"Scripps Research Institute","ror":"https://ror.org/02dxx6824","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I123431417"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Stefano Forli","raw_affiliation_strings":["Scripps Research, La Jolla, USA"],"raw_orcid":"https://orcid.org/0000-0002-5964-7111","affiliations":[{"raw_affiliation_string":"Scripps Research, La Jolla, USA","institution_ids":["https://openalex.org/I123431417"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.32140315,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"793","last_page":"803"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.45410001277923584,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.45410001277923584,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.2045000046491623,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.05909999832510948,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.5774999856948853},{"id":"https://openalex.org/keywords/tensor","display_name":"Tensor (intrinsic definition)","score":0.4991999864578247},{"id":"https://openalex.org/keywords/computational-complexity-theory","display_name":"Computational complexity theory","score":0.3580000102519989},{"id":"https://openalex.org/keywords/reduction","display_name":"Reduction (mathematics)","score":0.3560999929904938},{"id":"https://openalex.org/keywords/multiplication","display_name":"Multiplication (music)","score":0.3506999909877777},{"id":"https://openalex.org/keywords/matrix","display_name":"Matrix (chemical analysis)","score":0.3474999964237213},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.3160000145435333}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7009999752044678},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.5774999856948853},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.5584999918937683},{"id":"https://openalex.org/C155281189","wikidata":"https://www.wikidata.org/wiki/Q3518150","display_name":"Tensor (intrinsic definition)","level":2,"score":0.4991999864578247},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.498199999332428},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.3580000102519989},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.3560999929904938},{"id":"https://openalex.org/C2780595030","wikidata":"https://www.wikidata.org/wiki/Q3860309","display_name":"Multiplication (music)","level":2,"score":0.3506999909877777},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.3474999964237213},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3409000039100647},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3271999955177307},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.3160000145435333},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.30730000138282776},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.296999990940094},{"id":"https://openalex.org/C3018263672","wikidata":"https://www.wikidata.org/wiki/Q1296251","display_name":"Efficient algorithm","level":2,"score":0.2809999883174896},{"id":"https://openalex.org/C2988995629","wikidata":"https://www.wikidata.org/wiki/Q2915729","display_name":"Matrix algebra","level":3,"score":0.2639999985694885},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.2639000117778778},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.259799987077713},{"id":"https://openalex.org/C2989134064","wikidata":"https://www.wikidata.org/wiki/Q288510","display_name":"Execution time","level":2,"score":0.2581000030040741},{"id":"https://openalex.org/C2778648169","wikidata":"https://www.wikidata.org/wiki/Q967768","display_name":"Compatibility (geochemistry)","level":2,"score":0.25609999895095825}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3731599.3767437","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3731599.3767437","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SC '25 Workshops of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W6908809","https://openalex.org/W2091734240","https://openalex.org/W2158534713","https://openalex.org/W2791673912","https://openalex.org/W2895305554","https://openalex.org/W2901549770","https://openalex.org/W3044913359","https://openalex.org/W3120361527","https://openalex.org/W3123540889","https://openalex.org/W3132532188","https://openalex.org/W3159160110","https://openalex.org/W3213672119","https://openalex.org/W4212899820","https://openalex.org/W4221160294","https://openalex.org/W4288079730","https://openalex.org/W4309672181","https://openalex.org/W4362675180","https://openalex.org/W4400409880"],"related_works":[],"abstract_inverted_index":{"Tensor":[0],"Cores":[1],"(TCs)":[2],"are":[3,13],"specialized":[4],"hardware":[5],"units":[6],"designed":[7],"for":[8,37,64],"efficient":[9],"matrix":[10],"multiplication":[11],"and":[12,54,82,100],"widely":[14],"utilized":[15],"in":[16,23],"deep":[17],"learning":[18],"workloads.":[19],"However,":[20],"their":[21],"adoption":[22],"more":[24],"irregular":[25,51],"high-performance":[26],"computing":[27],"(HPC)":[28],"applications":[29],"remains":[30],"limited.":[31],"This":[32],"paper":[33],"presents":[34],"a":[35,42,72],"methodology":[36],"effectively":[38],"integrating":[39],"TCs":[40],"into":[41],"representative":[43],"HPC":[44],"application:":[45],"molecular":[46],"docking":[47],"with":[48],"AutoDock-GPU.":[49],"The":[50],"computational":[52],"patterns":[53],"strict":[55],"accuracy":[56],"requirements":[57],"of":[58],"this":[59],"application":[60],"pose":[61],"significant":[62],"challenges":[63],"TC":[65],"utilization.":[66],"To":[67],"address":[68],"these,":[69],"we":[70],"adopt":[71],"twofold":[73],"strategy:":[74],"(i)":[75],"accelerating":[76],"sum":[77],"reduction":[78],"operations":[79],"using":[80],"TCs,":[81],"(ii)":[83],"applying":[84],"state-of-the-art":[85],"numerical":[86],"error":[87],"correction":[88],"(EC)":[89],"techniques":[90],"to":[91],"maintain":[92],"accuracy.":[93,115],"Experimental":[94],"evaluations":[95],"on":[96],"NVIDIA":[97],"A100,":[98],"H100,":[99],"B200":[101],"GPUs":[102],"show":[103],"that":[104],"our":[105],"CUDA-based":[106],"implementation":[107],"consistently":[108],"outperforms":[109],"the":[110],"baseline":[111],"while":[112],"preserving":[113],"algorithmic":[114]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-11-07T00:00:00"}
