{"id":"https://openalex.org/W4206562307","doi":"https://doi.org/10.1109/bigdata52589.2021.9671320","title":"Scalable Sequence Clustering for Large-Scale Immune Repertoire Analysis","display_name":"Scalable Sequence Clustering for Large-Scale Immune Repertoire Analysis","publication_year":2021,"publication_date":"2021-12-15","ids":{"openalex":"https://openalex.org/W4206562307","doi":"https://doi.org/10.1109/bigdata52589.2021.9671320"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata52589.2021.9671320","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata52589.2021.9671320","pdf_url":null,"source":{"id":"https://openalex.org/S4363607718","display_name":"2021 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5071115766","display_name":"Prem Bhusal","orcid":null},"institutions":[{"id":"https://openalex.org/I19648265","display_name":"Wright State University","ror":"https://ror.org/04qk6pt94","country_code":"US","type":"education","lineage":["https://openalex.org/I19648265"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Prem Bhusal","raw_affiliation_strings":["Dept. of Computer Science and Engineering, Wright State University, Dayton, OH, USA"],"affiliations":[{"raw_affiliation_string":"Dept. of Computer Science and Engineering, Wright State University, Dayton, OH, USA","institution_ids":["https://openalex.org/I19648265"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104173405","display_name":"A K M Mubashwir Alam","orcid":null},"institutions":[{"id":"https://openalex.org/I102461120","display_name":"Marquette University","ror":"https://ror.org/04gr4te78","country_code":"US","type":"education","lineage":["https://openalex.org/I102461120"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"A K M Mubashwir Alam","raw_affiliation_strings":["Department of Computer Science, Marquette University, Milwaukee, WI, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Marquette University, Milwaukee, WI, USA","institution_ids":["https://openalex.org/I102461120"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002572745","display_name":"Keke Chen","orcid":"https://orcid.org/0000-0002-9996-156X"},"institutions":[{"id":"https://openalex.org/I102461120","display_name":"Marquette University","ror":"https://ror.org/04gr4te78","country_code":"US","type":"education","lineage":["https://openalex.org/I102461120"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Keke Chen","raw_affiliation_strings":["Department of Computer Science, Marquette University, Milwaukee, WI, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Marquette University, Milwaukee, WI, USA","institution_ids":["https://openalex.org/I102461120"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052756710","display_name":"Ning Jiang","orcid":"https://orcid.org/0000-0003-1579-3114"},"institutions":[{"id":"https://openalex.org/I79576946","display_name":"University of Pennsylvania","ror":"https://ror.org/00b30xv10","country_code":"US","type":"education","lineage":["https://openalex.org/I79576946"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ning Jiang","raw_affiliation_strings":["Department of Bioengineering, University of Pennsylvania, Philadelphia, PA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Bioengineering, University of Pennsylvania, Philadelphia, PA, USA","institution_ids":["https://openalex.org/I79576946"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102813050","display_name":"Jun Xiao","orcid":"https://orcid.org/0000-0001-9437-8933"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jun Xiao","raw_affiliation_strings":["ImmuDX LLC, Austin, TX, USA"],"affiliations":[{"raw_affiliation_string":"ImmuDX LLC, Austin, TX, USA","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5071115766"],"corresponding_institution_ids":["https://openalex.org/I19648265"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.32899628,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1349","last_page":"1358"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10066","display_name":"Gut microbiota and health","score":0.9731000065803528,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10066","display_name":"Gut microbiota and health","score":0.9731000065803528,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9685999751091003,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10167","display_name":"Influenza Virus Research Studies","score":0.9650999903678894,"subfield":{"id":"https://openalex.org/subfields/2713","display_name":"Epidemiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/repertoire","display_name":"Repertoire","score":0.6966339349746704},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.684846818447113},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6594432592391968},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6261860132217407},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.5435161590576172},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.48431363701820374},{"id":"https://openalex.org/keywords/computational-biology","display_name":"Computational biology","score":0.415335088968277},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3807987570762634},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3231567144393921},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.10057350993156433},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.08992806077003479},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.0760011374950409},{"id":"https://openalex.org/keywords/cartography","display_name":"Cartography","score":0.06848078966140747},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.06610742211341858},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.06176269054412842}],"concepts":[{"id":"https://openalex.org/C2778473898","wikidata":"https://www.wikidata.org/wiki/Q2145110","display_name":"Repertoire","level":2,"score":0.6966339349746704},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.684846818447113},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6594432592391968},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6261860132217407},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.5435161590576172},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.48431363701820374},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.415335088968277},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3807987570762634},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3231567144393921},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.10057350993156433},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.08992806077003479},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0760011374950409},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.06848078966140747},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.06610742211341858},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.06176269054412842},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata52589.2021.9671320","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata52589.2021.9671320","pdf_url":null,"source":{"id":"https://openalex.org/S4363607718","display_name":"2021 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W1549024380","https://openalex.org/W2014419562","https://openalex.org/W2049631158","https://openalex.org/W2060161303","https://openalex.org/W2061959470","https://openalex.org/W2087064593","https://openalex.org/W2097816699","https://openalex.org/W2097921974","https://openalex.org/W2102616711","https://openalex.org/W2120636855","https://openalex.org/W2124351063","https://openalex.org/W2140638310","https://openalex.org/W2142081242","https://openalex.org/W2145853890","https://openalex.org/W2156125289","https://openalex.org/W2163681976","https://openalex.org/W2170936641","https://openalex.org/W2171240476","https://openalex.org/W2189465200","https://openalex.org/W2578129866","https://openalex.org/W2594817103","https://openalex.org/W2616754551","https://openalex.org/W2895397838","https://openalex.org/W2950940950","https://openalex.org/W4231029117","https://openalex.org/W6674878074","https://openalex.org/W6687322159","https://openalex.org/W6731929781"],"related_works":["https://openalex.org/W4255048859","https://openalex.org/W3120300186","https://openalex.org/W2084831119","https://openalex.org/W4378635687","https://openalex.org/W2055491483","https://openalex.org/W2278124082","https://openalex.org/W2019758535","https://openalex.org/W2026305825","https://openalex.org/W2093107729","https://openalex.org/W2795428823"],"abstract_inverted_index":{"The":[0,205,231,246,271],"development":[1],"of":[2,29,57,68,86],"the":[3,19,27,39,45,90,128,137,143,148,180,213,219,227,254,257,267,290,307,311,325],"next-generation":[4],"sequencing":[5,60],"technology":[6],"has":[7,107],"enabled":[8],"systems":[9],"immunology":[10,116],"researchers":[11,24],"to":[12,25,73,99,110,125,178,289,297],"conduct":[13],"detailed":[14],"immune":[15,32,58,80],"repertoire":[16,59],"analysis":[17,52,321],"at":[18],"molecular":[20],"level":[21],"that":[22,38,153,176],"allows":[23],"understand":[26,75],"healthiness":[28],"a":[30,54,77,168,236],"patient\u2019s":[31],"system.":[33],"Recent":[34],"studies":[35],"have":[36,189],"shown":[37],"single-linkage":[40,92,129,139,150,183,282,291],"clustering":[41,93,184,187,208,220,268,283,300],"algorithm":[42,94,130,140,151,209,274,292],"can":[43,239],"give":[44],"best":[46],"results":[47],"for":[48,115,131,157,163,182,199,216,244],"B":[49,200,312],"cell":[50,201,313],"clonality":[51,202,314],"\u2013":[53,174],"critical":[55],"type":[56],"(IR-Seq)":[61],"analysis.":[62,203],"Large":[63],"sequence":[64,102,132,164,171,197,319],"datasets":[65,198],"(e.g.,":[66,210],"millions":[67],"sequences)":[69],"are":[70],"being":[71],"collected":[72],"comprehensively":[74],"how":[76],"specific":[78,308],"person\u2019s":[79],"system":[81],"evolves":[82],"over":[83],"different":[84,123],"stages":[85],"disease":[87],"development.":[88,119],"However,":[89,261,285],"classical":[91],"does":[95],"not":[96],"scale":[97,126],"well":[98,223],"such":[100],"large":[101],"datasets.":[103],"Surprisingly,":[104],"no":[105],"study":[106,121,304],"been":[108],"done":[109],"address":[111],"this":[112,262,303],"scalability":[113],"issue":[114],"research":[117],"and":[118,160,166,193,278,293],"We":[120],"three":[122],"strategies":[124],"up":[127],"data.":[133],"They":[134],"include":[135],"(1)":[136,204],"approximate":[138],"enhanced":[141],"with":[142,185,195],"non-Euclidean":[144],"indexing":[145],"methods,":[146],"(2)":[147,230],"Spark-based":[149],"(SparkMST)":[152],"was":[154],"originally":[155],"designed":[156],"vector":[158],"data":[159,181,242,320],"now":[161],"modified":[162],"data,":[165],"(3)":[167,270],"new":[169],"tree-based":[170],"summarization":[172],"approach":[173,233,263],"SCT":[175,232,249],"aims":[177],"reduce":[179,241],"well-preserved":[186],"quality.We":[188],"implemented":[190],"these":[191],"approaches":[192],"experimented":[194],"real":[196],"index-enhanced":[206],"hierarchical":[207,299],"VPT-HC":[211],"using":[212],"Vantage-Point":[214],"tree":[215],"indexing)":[217],"preserves":[218],"quality":[221],"very":[222],"while":[224],"significantly":[225],"reducing":[226],"time":[228],"complexity.":[229],"serving":[234],"as":[235],"preprocessing":[237],"step":[238],"effectively":[240],"size":[243],"clustering.":[245],"overall":[247],"clustering,":[248],"followed":[250],"by":[251],"VPT-HC,":[252],"is":[253,287],"fastest":[255],"among":[256],"evaluated":[258],"single-machine":[259],"algorithms.":[260,301],"also":[264,279],"slightly":[265],"affects":[266],"quality.":[269],"SparkMST":[272,286],"parallel":[273],"scales":[275],"out":[276],"nicely":[277],"gives":[280],"exact":[281],"results.":[284],"tied":[288],"cannot":[294],"be":[295],"extended":[296],"general":[298],"Although":[302],"focused":[305],"on":[306],"application":[309],"area:":[310],"analysis,":[315],"we":[316],"believe":[317],"other":[318],"problems":[322],"may":[323],"find":[324],"developed":[326],"scalable":[327],"techniques":[328],"useful.":[329]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
