{"id":"https://openalex.org/W7126112966","doi":"https://doi.org/10.1109/bibm66473.2025.11356559","title":"DNAMatch: An Ultra-Fast and Memory-Efficient Deep Learning Framework for Aligning Ultra-Long DNA Fragments","display_name":"DNAMatch: An Ultra-Fast and Memory-Efficient Deep Learning Framework for Aligning Ultra-Long DNA Fragments","publication_year":2025,"publication_date":"2025-12-15","ids":{"openalex":"https://openalex.org/W7126112966","doi":"https://doi.org/10.1109/bibm66473.2025.11356559"},"language":null,"primary_location":{"id":"doi:10.1109/bibm66473.2025.11356559","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bibm66473.2025.11356559","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5009647502","display_name":"Yuansong Zhu","orcid":"https://orcid.org/0000-0003-2208-7322"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yuansong Zhu","raw_affiliation_strings":["Harbin Institute of Technology,Faculty of Computing,Harbin,China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology,Faculty of Computing,Harbin,China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031518555","display_name":"Chao Wu","orcid":"https://orcid.org/0000-0001-8258-3227"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chuanmin Wu","raw_affiliation_strings":["Harbin Institute of Technology,Faculty of Computing,Harbin,China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology,Faculty of Computing,Harbin,China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124248017","display_name":"Yadong Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yadong Wang","raw_affiliation_strings":["Harbin Institute of Technology,Faculty of Computing,Harbin,China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology,Faculty of Computing,Harbin,China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101911266","display_name":"Tao Jiang","orcid":"https://orcid.org/0000-0003-3833-4498"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tao Jiang","raw_affiliation_strings":["Harbin Institute of Technology,Faculty of Computing,Harbin,China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology,Faculty of Computing,Harbin,China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100330474","display_name":"Yi Liu","orcid":"https://orcid.org/0000-0001-9701-6460"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yadong Liu","raw_affiliation_strings":["Harbin Institute of Technology,Faculty of Computing,Harbin,China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology,Faculty of Computing,Harbin,China","institution_ids":["https://openalex.org/I204983213"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5009647502"],"corresponding_institution_ids":["https://openalex.org/I204983213"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.70300897,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"578","last_page":"583"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.8468999862670898,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.8468999862670898,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10222","display_name":"Genomics and Chromatin Dynamics","score":0.04839999973773956,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11213","display_name":"Genomic variations and chromosomal abnormalities","score":0.027899999171495438,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/chromosome","display_name":"Chromosome","score":0.5530999898910522},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5085999965667725},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.5054000020027161},{"id":"https://openalex.org/keywords/deep-sequencing","display_name":"Deep sequencing","score":0.4821000099182129},{"id":"https://openalex.org/keywords/dna-sequencing","display_name":"DNA sequencing","score":0.4659999907016754},{"id":"https://openalex.org/keywords/contig","display_name":"Contig","score":0.4645000100135803},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.44940000772476196},{"id":"https://openalex.org/keywords/genomics","display_name":"Genomics","score":0.38690000772476196},{"id":"https://openalex.org/keywords/heuristic","display_name":"Heuristic","score":0.3765000104904175}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6696000099182129},{"id":"https://openalex.org/C30481170","wikidata":"https://www.wikidata.org/wiki/Q37748","display_name":"Chromosome","level":3,"score":0.5530999898910522},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5414999723434448},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5085999965667725},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.5054000020027161},{"id":"https://openalex.org/C132917006","wikidata":"https://www.wikidata.org/wiki/Q5250436","display_name":"Deep sequencing","level":4,"score":0.4821000099182129},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.4715999960899353},{"id":"https://openalex.org/C51679486","wikidata":"https://www.wikidata.org/wiki/Q380546","display_name":"DNA sequencing","level":3,"score":0.4659999907016754},{"id":"https://openalex.org/C59582021","wikidata":"https://www.wikidata.org/wiki/Q1128751","display_name":"Contig","level":4,"score":0.4645000100135803},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.44940000772476196},{"id":"https://openalex.org/C189206191","wikidata":"https://www.wikidata.org/wiki/Q222046","display_name":"Genomics","level":4,"score":0.38690000772476196},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.3765000104904175},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.37549999356269836},{"id":"https://openalex.org/C74912251","wikidata":"https://www.wikidata.org/wiki/Q6815727","display_name":"Memory footprint","level":2,"score":0.367900013923645},{"id":"https://openalex.org/C2776207758","wikidata":"https://www.wikidata.org/wiki/Q5303302","display_name":"Downstream (manufacturing)","level":2,"score":0.36329999566078186},{"id":"https://openalex.org/C151020129","wikidata":"https://www.wikidata.org/wiki/Q7625067","display_name":"Structural variation","level":4,"score":0.34630000591278076},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.3287999927997589},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3215999901294708},{"id":"https://openalex.org/C552990157","wikidata":"https://www.wikidata.org/wiki/Q7430","display_name":"DNA","level":2,"score":0.3147999942302704},{"id":"https://openalex.org/C182901222","wikidata":"https://www.wikidata.org/wiki/Q419061","display_name":"Bacterial artificial chromosome","level":4,"score":0.30399999022483826},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.28790000081062317},{"id":"https://openalex.org/C132943942","wikidata":"https://www.wikidata.org/wiki/Q2562511","display_name":"Footprint","level":2,"score":0.28279998898506165},{"id":"https://openalex.org/C2779694297","wikidata":"https://www.wikidata.org/wiki/Q2888953","display_name":"Bioconductor","level":3,"score":0.2800000011920929},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.2750000059604645},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.25679999589920044},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2522999942302704},{"id":"https://openalex.org/C180384323","wikidata":"https://www.wikidata.org/wiki/Q16335137","display_name":"Alignment-free sequence analysis","level":5,"score":0.25110000371932983}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bibm66473.2025.11356559","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bibm66473.2025.11356559","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W2194775991","https://openalex.org/W2415697913","https://openalex.org/W2789843538","https://openalex.org/W2809646241","https://openalex.org/W2963250244","https://openalex.org/W3042377559","https://openalex.org/W3046401484","https://openalex.org/W3101140821","https://openalex.org/W3177485564","https://openalex.org/W4285239178","https://openalex.org/W4366352255","https://openalex.org/W4380685998","https://openalex.org/W4385245566","https://openalex.org/W4402574048","https://openalex.org/W4403597825","https://openalex.org/W4409481553","https://openalex.org/W4411281286","https://openalex.org/W4411571587"],"related_works":[],"abstract_inverted_index":{"Efficient":[0],"and":[1,24,53,96,112,131,150,189,196],"accurate":[2],"alignment":[3,35,65,114,146],"of":[4,15,21,50,115],"DNA":[5,43,117],"fragments":[6,44],"is":[7],"fundamental":[8],"to":[9,46,159,177],"genomics":[10],"research.":[11],"With":[12],"the":[13,19,47,69,93,99,104,145,178],"advent":[14],"advanced":[16],"sequencing":[17,22],"technologies,":[18],"length":[20],"reads":[23,123,195],"assembled":[25],"contigs":[26],"has":[27],"increased":[28],"significantly,":[29],"posing":[30],"substantial":[31],"challenges":[32],"for":[33,73,81,192],"existing":[34],"algorithms.":[36],"These":[37,182],"methods":[38],"often":[39],"struggle":[40],"with":[41,76,168],"megabase-scale":[42],"due":[45],"computational":[48],"burden":[49],"global":[51],"searches":[52],"exhaustive":[54],"chromosomal":[55],"queries.":[56],"To":[57],"address":[58],"this,":[59],"we":[60],"propose":[61],"DNAMatch,":[62],"a":[63,77,86,170,187],"novel":[64],"framework":[66],"that":[67,134],"integrates":[68],"DNABERT2":[70],"pre-trained":[71],"model":[72,126],"feature":[74],"extraction":[75],"deep":[78],"residual":[79],"network":[80],"chromosome":[82,87,142],"identification.":[83],"DNAMatch":[84,135,164,185],"introduces":[85],"pre-localization":[88],"strategy,":[89],"which":[90],"effectively":[91],"narrows":[92],"search":[94],"space":[95],"significantly":[97],"reduces":[98,151],"memory":[100,152],"footprint":[101],"required":[102],"by":[103,148,154],"downstream":[105,160],"aligner,":[106],"minimap2.":[107],"This":[108],"design":[109],"enables":[110],"fast":[111],"precise":[113],"ultra-long":[116,122,194],"fragments.":[118],"Benchmarking":[119],"on":[120],"simulated":[121],"from":[124],"multiple":[125],"organisms-including":[127],"human,":[128],"Drosophila":[129],"melanogaster,":[130],"Arabidopsis":[132],"thaliana-demonstrates":[133],"achieves":[136],"9":[137],"8-9":[138],"9%":[139],"accuracy":[140],"in":[141,174],"identification,":[143],"accelerates":[144],"process":[147],"52.7%,":[149],"usage":[153],"73%.":[155],"Importantly,":[156],"when":[157],"applied":[158],"structural":[161],"variation":[162],"detection,":[163],"maintains":[165],"high":[166],"accuracy,":[167],"only":[169],"marginal":[171],"0.05%":[172],"decrease":[173],"F1-score":[175],"compared":[176],"standard":[179],"minimap2":[180],"pipeline.":[181],"results":[183],"highlight":[184],"as":[186],"powerful":[188],"efficient":[190],"tool":[191],"aligning":[193],"analyzing":[197],"complex":[198],"genomes.":[199]},"counts_by_year":[],"updated_date":"2026-02-23T20:09:44.859080","created_date":"2026-01-30T00:00:00"}
