{"id":"https://openalex.org/W4403334642","doi":"https://doi.org/10.1145/3656019.3676894","title":"Leveraging Difference Recurrence Relations for High-Performance GPU Genome Alignment","display_name":"Leveraging Difference Recurrence Relations for High-Performance GPU Genome Alignment","publication_year":2024,"publication_date":"2024-10-11","ids":{"openalex":"https://openalex.org/W4403334642","doi":"https://doi.org/10.1145/3656019.3676894"},"language":"en","primary_location":{"id":"doi:10.1145/3656019.3676894","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3656019.3676894","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Parallel Architectures and Compilation Techniques","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3656019.3676894","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5005739756","display_name":"Alberto Zeni","orcid":"https://orcid.org/0000-0003-4005-6036"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]},{"id":"https://openalex.org/I93860229","display_name":"Politecnico di Milano","ror":"https://ror.org/01nffqt88","country_code":"IT","type":"education","lineage":["https://openalex.org/I93860229"]}],"countries":["IT","US"],"is_corresponding":true,"raw_author_name":"Alberto Zeni","raw_affiliation_strings":["Dipartimento di Elettronica, Informazione e Bioingegneria, Politecnico di Milano, Italy, Italy and NVIDIA Corporation, USA"],"affiliations":[{"raw_affiliation_string":"Dipartimento di Elettronica, Informazione e Bioingegneria, Politecnico di Milano, Italy, Italy and NVIDIA Corporation, USA","institution_ids":["https://openalex.org/I4210127875","https://openalex.org/I93860229"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109026423","display_name":"Seth Onken","orcid":null},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Seth Onken","raw_affiliation_strings":["NVIDIA Corporation, USA"],"affiliations":[{"raw_affiliation_string":"NVIDIA Corporation, USA","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010543929","display_name":"Marco D. Santambrogio","orcid":"https://orcid.org/0000-0002-9883-9693"},"institutions":[{"id":"https://openalex.org/I93860229","display_name":"Politecnico di Milano","ror":"https://ror.org/01nffqt88","country_code":"IT","type":"education","lineage":["https://openalex.org/I93860229"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Marco Domenico Santambrogio","raw_affiliation_strings":["Dipartimento di Elettronica, Informazione e Bioingegneria, Politecnico di Milano, Italy, Italy"],"affiliations":[{"raw_affiliation_string":"Dipartimento di Elettronica, Informazione e Bioingegneria, Politecnico di Milano, Italy, Italy","institution_ids":["https://openalex.org/I93860229"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058642630","display_name":"Mehrzad Samadi","orcid":"https://orcid.org/0000-0002-3581-1255"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mehrzad Samadi","raw_affiliation_strings":["NVIDIA Corporation, USA"],"affiliations":[{"raw_affiliation_string":"NVIDIA Corporation, USA","institution_ids":["https://openalex.org/I4210127875"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5005739756"],"corresponding_institution_ids":["https://openalex.org/I4210127875","https://openalex.org/I93860229"],"apc_list":null,"apc_paid":null,"fwci":0.2295,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.58012454,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"133","last_page":"143"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9894000291824341,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9894000291824341,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9830999970436096,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10222","display_name":"Genomics and Chromatin Dynamics","score":0.9754999876022339,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7336015105247498},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6177166700363159},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.43321746587753296},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.17179784178733826},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.09748607873916626}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7336015105247498},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6177166700363159},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.43321746587753296},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.17179784178733826},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.09748607873916626}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3656019.3676894","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3656019.3676894","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Parallel Architectures and Compilation Techniques","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3656019.3676894","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3656019.3676894","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Parallel Architectures and Compilation Techniques","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320323817","display_name":"Universitas Brawijaya","ror":"https://ror.org/01wk3d929"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W1964471912","https://openalex.org/W2002555321","https://openalex.org/W2011218124","https://openalex.org/W2021342241","https://openalex.org/W2068448872","https://openalex.org/W2074231493","https://openalex.org/W2087064593","https://openalex.org/W2117683095","https://openalex.org/W2132967231","https://openalex.org/W2789843538","https://openalex.org/W3043148535","https://openalex.org/W3136857322","https://openalex.org/W3176565954","https://openalex.org/W4214929667","https://openalex.org/W4225525993","https://openalex.org/W4313193831","https://openalex.org/W4317698849","https://openalex.org/W4385679804","https://openalex.org/W4390280285"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2505380084","https://openalex.org/W2390279801","https://openalex.org/W4400333498","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W1980160788"],"abstract_inverted_index":{"Genome":[0],"pairwise":[1,64],"sequence":[2,56],"alignment":[3,81,133,270],"is":[4,304],"one":[5,152,195],"of":[6,21,24,37,62,77,127,151,153,168,186,196,236,251],"the":[7,22,35,43,47,59,72,78,112,136,154,184,197,204,207,228,249,252,265,272,277,289,293],"most":[8,198],"computationally":[9],"intensive":[10],"workloads":[11],"in":[12,30,203],"many":[13],"genomic":[14,38],"pipelines,":[15],"often":[16],"accounting":[17],"for":[18,49,297],"over":[19,111],"90%":[20],"runtime":[23,212],"critical":[25],"bioinformatics":[26],"applications.":[27],"Recent":[28],"advancements":[29],"sequencing":[31,39],"technologies":[32],"keep":[33],"increasing":[34],"throughput":[36],"data":[40],"while":[41,131],"decreasing":[42],"associated":[44],"cost,":[45],"emphasizing":[46],"need":[48],"fast":[50],"and":[51,99,201,219,231,259,263,268,299],"accurate":[52],"software":[53,115,230,274],"to":[54,91,102,173,216,275],"perform":[55],"analysis,":[57],"given":[58],"quadratic":[60],"complexity":[61],"exact":[63],"algorithms.":[65,156],"In":[66],"this":[67],"challenging":[68],"scenario,":[69],"we":[70,139,158,189,284],"present":[71],"first":[73],"fully":[74],"GPU-accelerated":[75],"version":[76,150,235],"KSW2":[79,155,254,298],"genome":[80],"library.":[82],"Results":[83],"show":[84],"that":[85,301],"our":[86,160,187,191,286,302,308],"high-performance":[87],"implementation":[88,161,167,303],"achieves":[89],"up":[90,101,172,215],"1145.17":[92],"Giga":[93],"Cell":[94],"Updates":[95],"Per":[96],"Second":[97],"(GCUPS)":[98],"speedups":[100,171],"72.83":[103],"\u00d7":[104,143,175,218,221],"on":[105,117,288,307],"a":[106,125,141,147,163,177,223],"single":[107,178,224],"NVIDIA":[108],"Tesla":[109],"H100":[110,179,225,290],"state-of-the-art":[113,148,253],"baseline":[114,229],"running":[116],"two":[118],"Intel":[119],"Xeon":[120],"Platinum":[121],"8358":[122],"processors":[123],"with":[124],"total":[126],"128":[128],"CPU":[129],"threads,":[130],"preserving":[132],"accuracy.":[134],"Using":[135],"same":[137],"configuration,":[138],"demonstrate":[140],"66.00":[142],"speedup,":[144],"versus":[145],"ksw2d-fast,":[146],"improved":[149],"Furthermore,":[157],"compare":[159],"against":[162,227],"recently":[164],"proposed":[165],"FPGA":[166],"ksw2z,":[169],"achieving":[170],"156.37":[174],"using":[176,222],"GPU.":[180],"To":[181],"further":[182,280],"highlight":[183],"impact":[185],"work,":[188],"integrate":[190],"accelerated":[192],"kernels":[193],"within":[194],"used":[199],"aligners":[200],"mappers":[202],"State":[205],"Of":[206],"Art,":[208],"called":[209],"minimap2,":[210],"demonstrating":[211,300],"improvements":[213],"by":[214],"8.51":[217],"8.03":[220],"GPU":[226,310],"mm2-fast,":[232],"an":[233],"optimized":[234],"minimap2":[237],"which":[238],"integrates":[239],"ksw2d-fast":[240],"as":[241,271],"its":[242],"core":[243],"aligner.":[244],"Our":[245],"design":[246],"accelerates":[247],"all":[248],"algorithms":[250],"aligner":[255],"suite":[256],"(splice,":[257],"double-":[258],"single-":[260],"gap":[261],"affine)":[262],"supports":[264],"Z-drop":[266],"heuristic":[267],"banded":[269],"original":[273],"reduce":[276],"processing":[278],"time":[279],"if":[281],"needed.":[282],"Finally,":[283],"evaluate":[285],"application":[287],"GPU,":[291],"adapting":[292],"Berkeley":[294],"Roofline":[295],"model":[296],"near":[305],"optimal":[306],"target":[309],"architecture.":[311]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
