{"id":"https://openalex.org/W7126048080","doi":"https://doi.org/10.1109/bibm66473.2025.11356050","title":"Concisemizer: An Asymmetric Redundancy-Removal Algorithm for Efficient Seed Sampling in Large-Scale Sequence Alignment","display_name":"Concisemizer: An Asymmetric Redundancy-Removal Algorithm for Efficient Seed Sampling in Large-Scale Sequence Alignment","publication_year":2025,"publication_date":"2025-12-15","ids":{"openalex":"https://openalex.org/W7126048080","doi":"https://doi.org/10.1109/bibm66473.2025.11356050"},"language":null,"primary_location":{"id":"doi:10.1109/bibm66473.2025.11356050","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bibm66473.2025.11356050","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5004355908","display_name":"Hui Gao","orcid":"https://orcid.org/0000-0003-0162-2445"},"institutions":[{"id":"https://openalex.org/I143413998","display_name":"Qingdao University of Science and Technology","ror":"https://ror.org/041j8js14","country_code":"CN","type":"education","lineage":["https://openalex.org/I143413998"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hao Gao","raw_affiliation_strings":["College of Computer Science and Technology, Qingdao University,Qingdao,China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Qingdao University,Qingdao,China","institution_ids":["https://openalex.org/I143413998"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yangyang Sun","orcid":null},"institutions":[{"id":"https://openalex.org/I143413998","display_name":"Qingdao University of Science and Technology","ror":"https://ror.org/041j8js14","country_code":"CN","type":"education","lineage":["https://openalex.org/I143413998"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yangyang Sun","raw_affiliation_strings":["College of Computer Science and Technology, Qingdao University,Qingdao,China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Qingdao University,Qingdao,China","institution_ids":["https://openalex.org/I143413998"]}]},{"author_position":"last","author":{"id":null,"display_name":"Xiaoquan Su","orcid":null},"institutions":[{"id":"https://openalex.org/I143413998","display_name":"Qingdao University of Science and Technology","ror":"https://ror.org/041j8js14","country_code":"CN","type":"education","lineage":["https://openalex.org/I143413998"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoquan Su","raw_affiliation_strings":["College of Computer Science and Technology, Qingdao University,Qingdao,China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Qingdao University,Qingdao,China","institution_ids":["https://openalex.org/I143413998"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5004355908"],"corresponding_institution_ids":["https://openalex.org/I143413998"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.66711922,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"6","last_page":"11"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9074000120162964,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9074000120162964,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11213","display_name":"Genomic variations and chromosomal abnormalities","score":0.03189999982714653,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T13664","display_name":"Genome Rearrangement Algorithms","score":0.030300000682473183,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/redundancy","display_name":"Redundancy (engineering)","score":0.8130999803543091},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6894999742507935},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.6873000264167786},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.5108000040054321},{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.5095999836921692},{"id":"https://openalex.org/keywords/multiple-sequence-alignment","display_name":"Multiple sequence alignment","score":0.39969998598098755},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.385699987411499},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.3709999918937683}],"concepts":[{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.8130999803543091},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6894999742507935},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.6873000264167786},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.612500011920929},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5386000275611877},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.5108000040054321},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.5095999836921692},{"id":"https://openalex.org/C88031987","wikidata":"https://www.wikidata.org/wiki/Q1377767","display_name":"Multiple sequence alignment","level":5,"score":0.39969998598098755},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.385699987411499},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.3709999918937683},{"id":"https://openalex.org/C204241405","wikidata":"https://www.wikidata.org/wiki/Q461499","display_name":"Transformation (genetics)","level":3,"score":0.35040000081062317},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3449000120162964},{"id":"https://openalex.org/C42058472","wikidata":"https://www.wikidata.org/wiki/Q810214","display_name":"Base (topology)","level":2,"score":0.3319000005722046},{"id":"https://openalex.org/C3018263672","wikidata":"https://www.wikidata.org/wiki/Q1296251","display_name":"Efficient algorithm","level":2,"score":0.32519999146461487},{"id":"https://openalex.org/C45484198","wikidata":"https://www.wikidata.org/wiki/Q827246","display_name":"Sequence alignment","level":4,"score":0.3165999948978424},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.31369999051094055},{"id":"https://openalex.org/C4668613","wikidata":"https://www.wikidata.org/wiki/Q4116110","display_name":"Structural alignment","level":5,"score":0.3003999888896942},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.2847000062465668},{"id":"https://openalex.org/C192953774","wikidata":"https://www.wikidata.org/wiki/Q7307127","display_name":"Reference genome","level":4,"score":0.2791999876499176},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.26809999346733093},{"id":"https://openalex.org/C37404715","wikidata":"https://www.wikidata.org/wiki/Q380679","display_name":"Dynamic programming","level":2,"score":0.2644999921321869},{"id":"https://openalex.org/C106516650","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm design","level":2,"score":0.25999999046325684}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bibm66473.2025.11356050","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bibm66473.2025.11356050","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W1981014117","https://openalex.org/W2015292449","https://openalex.org/W2029195137","https://openalex.org/W2055043387","https://openalex.org/W2074231493","https://openalex.org/W2087064593","https://openalex.org/W2108234281","https://openalex.org/W2111295912","https://openalex.org/W2136145671","https://openalex.org/W2144560237","https://openalex.org/W2152956782","https://openalex.org/W2487384794","https://openalex.org/W2607669908","https://openalex.org/W2789843538","https://openalex.org/W2792368734","https://openalex.org/W2950121474","https://openalex.org/W2950572599","https://openalex.org/W2951160681","https://openalex.org/W2951822379","https://openalex.org/W2951912016","https://openalex.org/W2990618091","https://openalex.org/W3114310315","https://openalex.org/W3117679888","https://openalex.org/W3128964673","https://openalex.org/W3182040180","https://openalex.org/W4224215085","https://openalex.org/W4295443308","https://openalex.org/W4311803917","https://openalex.org/W4318193482","https://openalex.org/W4321611856","https://openalex.org/W4375863320","https://openalex.org/W4391211781","https://openalex.org/W4395000446","https://openalex.org/W4403379006","https://openalex.org/W4404225840"],"related_works":[],"abstract_inverted_index":{"Modern":[0],"sequence":[1,166],"alignment":[2,39,106],"tools":[3],"rely":[4],"on":[5,35,110,146],"seed-based":[6],"anchoring":[7],"to":[8,59,132],"efficiently":[9],"align":[10],"large-scale":[11,165],"sequencing":[12],"data.":[13],"Minimizers":[14],"are":[15],"widely":[16],"used":[17],"for":[18,172],"seed":[19,43,61,78,102,119,153],"extraction":[20,154],"but":[21],"often":[22],"generate":[23],"excessive":[24],"and":[25,66,121,162],"redundant":[26],"seeds":[27,68],"that":[28,63,85],"each":[29],"base":[30],"is":[31],"typically":[32],"sampled":[33],"twice":[34],"average,":[36],"thereby":[37,158],"limiting":[38],"throughput.":[40],"To":[41],"address":[42],"redundancy,":[44],"we":[45],"propose":[46],"Concisemizer,":[47],"an":[48,82],"asymmetric,":[49],"simultaneous":[50],"de-redundancy":[51],"algorithm.":[52],"Concisemizer":[53,100,130,148],"introduces":[54],"positional":[55],"filtering":[56],"steps":[57],"applied":[58],"sliding":[60],"triples":[62],"simultaneously":[64],"extract":[65],"filter":[67],"from":[69,89],"the":[70,90,97,124,160],"reference":[71,91],"sequence,":[72],"resulting":[73],"in":[74,96,118,127,139,152,175],"a":[75,133],"more":[76],"efficient":[77],"set.":[79],"By":[80],"employing":[81],"asymmetric":[83],"strategy":[84],"removes":[86],"redundancy":[87,151],"only":[88],"sequences":[92],"while":[93,104,155],"retaining":[94],"minimizers":[95],"query":[98],"sequences,":[99],"reduces":[101,150],"count":[103],"preserving":[105],"sensitivity.":[107],"Comparative":[108],"evaluations":[109],"six":[111],"model":[112],"species":[113],"demonstrated":[114],"Concisemizer's":[115],"superior":[116],"performance":[117],"density":[120],"repetition.":[122],"Replacing":[123],"minimizer":[125],"module":[126],"minimap2":[128],"with":[129,143],"led":[131],"<tex":[134],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[135],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$9.6":[136],"\\times$</tex>":[137],"speedup":[138],"human":[140],"genome":[141],"alignment,":[142],"minimal":[144],"impact":[145],"accuracy.":[147],"substantially":[149],"maintaining":[156],"accuracy,":[157],"improving":[159],"efficiency":[161],"scalability":[163],"of":[164],"alignment.":[167],"It":[168],"holds":[169],"strong":[170],"potential":[171],"widespread":[173],"adoption":[174],"genomic":[176],"analysis":[177],"pipelines.":[178]},"counts_by_year":[],"updated_date":"2026-03-25T23:56:10.502304","created_date":"2026-01-30T00:00:00"}
