{"id":"https://openalex.org/W2144990395","doi":"https://doi.org/10.14778/1687627.1687651","title":"Reference-based alignment in large sequence databases","display_name":"Reference-based alignment in large sequence databases","publication_year":2009,"publication_date":"2009-08-01","ids":{"openalex":"https://openalex.org/W2144990395","doi":"https://doi.org/10.14778/1687627.1687651","mag":"2144990395"},"language":"en","primary_location":{"id":"doi:10.14778/1687627.1687651","is_oa":false,"landing_page_url":"https://doi.org/10.14778/1687627.1687651","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5044999523","display_name":"Panagiotis Papapetrou","orcid":"https://orcid.org/0000-0002-4632-4815"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Panagiotis Papapetrou","raw_affiliation_strings":["Boston University"],"affiliations":[{"raw_affiliation_string":"Boston University","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025213339","display_name":"Vassilis Athitsos","orcid":"https://orcid.org/0000-0002-1281-6168"},"institutions":[{"id":"https://openalex.org/I189196454","display_name":"The University of Texas at Arlington","ror":"https://ror.org/019kgqr73","country_code":"US","type":"education","lineage":["https://openalex.org/I189196454"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vassilis Athitsos","raw_affiliation_strings":["University of Texas at Arlington"],"affiliations":[{"raw_affiliation_string":"University of Texas at Arlington","institution_ids":["https://openalex.org/I189196454"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061270013","display_name":"George Kollios","orcid":"https://orcid.org/0009-0004-1837-8498"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"George Kollios","raw_affiliation_strings":["Boston University"],"affiliations":[{"raw_affiliation_string":"Boston University","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5063685438","display_name":"Dimitrios Gunopulos","orcid":"https://orcid.org/0000-0001-6339-1879"},"institutions":[{"id":"https://openalex.org/I200777214","display_name":"National and Kapodistrian University of Athens","ror":"https://ror.org/04gnjpq42","country_code":"GR","type":"education","lineage":["https://openalex.org/I200777214"]},{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["GR","US"],"is_corresponding":false,"raw_author_name":"Dimitrios Gunopulos","raw_affiliation_strings":["University of Athens and UC Riverside","University of Athens and UC Riverside#TAB#"],"affiliations":[{"raw_affiliation_string":"University of Athens and UC Riverside","institution_ids":["https://openalex.org/I103635307","https://openalex.org/I200777214"]},{"raw_affiliation_string":"University of Athens and UC Riverside#TAB#","institution_ids":["https://openalex.org/I200777214"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5044999523"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.7098,"has_fulltext":false,"cited_by_count":18,"citation_normalized_percentile":{"value":0.91343444,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"2","issue":"1","first_page":"205","last_page":"216"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12326","display_name":"Network Packet Processing and Optimization","score":0.9889000058174133,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/subsequence","display_name":"Subsequence","score":0.7341139316558838},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.6333249807357788},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6327425837516785},{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.5959840416908264},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5831871032714844},{"id":"https://openalex.org/keywords/longest-common-subsequence-problem","display_name":"Longest common subsequence problem","score":0.5788629651069641},{"id":"https://openalex.org/keywords/edit-distance","display_name":"Edit distance","score":0.5744308829307556},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.5086575150489807},{"id":"https://openalex.org/keywords/string","display_name":"String (physics)","score":0.4725843667984009},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4703778624534607},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.457765132188797},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.38645702600479126},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3628944754600525},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.32386064529418945},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2384694516658783},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.07574251294136047}],"concepts":[{"id":"https://openalex.org/C137877099","wikidata":"https://www.wikidata.org/wiki/Q1332977","display_name":"Subsequence","level":3,"score":0.7341139316558838},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.6333249807357788},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6327425837516785},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.5959840416908264},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5831871032714844},{"id":"https://openalex.org/C120098539","wikidata":"https://www.wikidata.org/wiki/Q141001","display_name":"Longest common subsequence problem","level":2,"score":0.5788629651069641},{"id":"https://openalex.org/C44359876","wikidata":"https://www.wikidata.org/wiki/Q5338467","display_name":"Edit distance","level":2,"score":0.5744308829307556},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.5086575150489807},{"id":"https://openalex.org/C157486923","wikidata":"https://www.wikidata.org/wiki/Q1376436","display_name":"String (physics)","level":2,"score":0.4725843667984009},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4703778624534607},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.457765132188797},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.38645702600479126},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3628944754600525},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.32386064529418945},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2384694516658783},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.07574251294136047},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C6557445","wikidata":"https://www.wikidata.org/wiki/Q173113","display_name":"Agronomy","level":1,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C34388435","wikidata":"https://www.wikidata.org/wiki/Q2267362","display_name":"Bounded function","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C37914503","wikidata":"https://www.wikidata.org/wiki/Q156495","display_name":"Mathematical physics","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.14778/1687627.1687651","is_oa":false,"landing_page_url":"https://doi.org/10.14778/1687627.1687651","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.151.4826","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.151.4826","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.vldb.org/pvldb/2/vldb09-320.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.151.5793","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.151.5793","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://crystal.uta.edu/~athitsos/publications/papapetrou_vldb2009.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.207.3613","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.207.3613","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cis.hut.fi/panagpap/papapetrou_vldb2009.pdf","raw_type":"text"},{"id":"pmh:oai:eprints.bbk.ac.uk.oai2:7440","is_oa":false,"landing_page_url":"http://www.vldb.org/pvldb/2/vldb09-320.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400466","display_name":"BIROn (Birkbeck, University of London)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I98259816","host_organization_name":"Birkbeck, University of London","host_organization_lineage":["https://openalex.org/I98259816"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W125979907","https://openalex.org/W1490635997","https://openalex.org/W1557257771","https://openalex.org/W1647671624","https://openalex.org/W1714821586","https://openalex.org/W1873119440","https://openalex.org/W1972418517","https://openalex.org/W1985108724","https://openalex.org/W2000390755","https://openalex.org/W2008689179","https://openalex.org/W2011632873","https://openalex.org/W2015292449","https://openalex.org/W2028445324","https://openalex.org/W2029195137","https://openalex.org/W2035399801","https://openalex.org/W2039249321","https://openalex.org/W2055043387","https://openalex.org/W2055666215","https://openalex.org/W2059513841","https://openalex.org/W2060873943","https://openalex.org/W2074231493","https://openalex.org/W2086022864","https://openalex.org/W2087064593","https://openalex.org/W2103717317","https://openalex.org/W2108235229","https://openalex.org/W2115321097","https://openalex.org/W2119057313","https://openalex.org/W2123020735","https://openalex.org/W2129713651","https://openalex.org/W2131329986","https://openalex.org/W2133706543","https://openalex.org/W2134826720","https://openalex.org/W2136145671","https://openalex.org/W2142619120","https://openalex.org/W2143642163","https://openalex.org/W2151214225","https://openalex.org/W2154931799","https://openalex.org/W2158714788","https://openalex.org/W2158874082","https://openalex.org/W2165480965","https://openalex.org/W2167847032","https://openalex.org/W2169353508","https://openalex.org/W2436540252","https://openalex.org/W2752061190","https://openalex.org/W2752853835","https://openalex.org/W6628896446","https://openalex.org/W6744529318"],"related_works":["https://openalex.org/W2163993443","https://openalex.org/W2998685384","https://openalex.org/W2565438332","https://openalex.org/W2250447163","https://openalex.org/W3031506718","https://openalex.org/W2132372003","https://openalex.org/W2110723042","https://openalex.org/W2432338705","https://openalex.org/W2158495000","https://openalex.org/W2091010252"],"abstract_inverted_index":{"This":[0],"paper":[1],"introduces":[2],"a":[3,44,57,118,163],"novel":[4],"method,":[5],"called":[6],"Reference-Based":[7],"String":[8],"Alignment":[9],"(RBSA),":[10],"that":[11,38,53,69,88,138],"speeds":[12],"up":[13],"retrieval":[14,101],"of":[15,22,60,83,106,120,134,139,145,167,182],"optimal":[16,40],"subsequence":[17,169],"matches":[18,108],"in":[19,100],"large":[20,77],"databases":[21],"sequences":[23,157],"under":[24],"the":[25,29,36,39,49,61,94,114,127,140,148,152,155,179,183],"edit":[26],"distance":[27],"and":[28,74,117,154,202],"Smith-Waterman":[30],"similarity":[31],"measure.":[32],"RBSA":[33,64,84,103,190],"operates":[34],"using":[35,109],"assumption":[37],"match":[41],"deviates":[42],"by":[43],"relatively":[45,164],"small":[46,165],"amount":[47,52],"from":[48],"query,":[50],"an":[51,66],"does":[54],"not":[55],"exceed":[56],"prespecified":[58],"fraction":[59],"query":[62,125,128],"length.":[63],"has":[65],"exact":[67,95],"version":[68,82],"guarantees":[70],"no":[71],"false":[72],"dismissals":[73],"can":[75],"handle":[76],"queries":[78],"efficiently.":[79],"An":[80,171],"approximate":[81],"is":[85,130,175],"also":[86],"described,":[87],"achieves":[89],"significant":[90],"additional":[91],"improvements":[92],"over":[93],"version,":[96],"with":[97],"negligible":[98],"losses":[99],"accuracy.":[102],"performs":[104],"filtering":[105],"candidate":[107,168],"precomputed":[110],"alignment":[111,149,196],"scores":[112,150],"between":[113,151],"database":[115],"sequence":[116,129,195],"set":[119],"fixed-length":[121],"reference":[122,141,156],"sequences.":[123,142],"At":[124],"time,":[126],"partitioned":[131],"into":[132],"segments":[133],"length":[135],"equal":[136],"to":[137,160,177],"For":[143],"each":[144],"those":[146],"segments,":[147],"segment":[153],"are":[158],"used":[159],"efficiently":[161],"identify":[162],"number":[166],"matches.":[170],"alphabet":[172],"collapsing":[173],"technique":[174],"employed":[176],"improve":[178],"pruning":[180],"power":[181],"filter":[184],"step.":[185],"In":[186],"our":[187],"experimental":[188],"evaluation,":[189],"significantly":[191],"outperforms":[192],"state-of-the-art":[193],"biological":[194],"methods,":[197],"such":[198],"as":[199],"q-grams,":[200],"BLAST,":[201],"BWT.":[203]},"counts_by_year":[{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":2},{"year":2014,"cited_by_count":2},{"year":2013,"cited_by_count":2},{"year":2012,"cited_by_count":4}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
