{"id":"https://openalex.org/W4320024298","doi":"https://doi.org/10.1109/bigdata55660.2022.10020217","title":"Informative Initialization and Kernel Selection Improves t-SNE for Biological Sequences","display_name":"Informative Initialization and Kernel Selection Improves t-SNE for Biological Sequences","publication_year":2022,"publication_date":"2022-12-17","ids":{"openalex":"https://openalex.org/W4320024298","doi":"https://doi.org/10.1109/bigdata55660.2022.10020217"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata55660.2022.10020217","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata55660.2022.10020217","pdf_url":null,"source":{"id":"https://openalex.org/S4363607709","display_name":"2022 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5017366862","display_name":"Prakash Chourasia","orcid":"https://orcid.org/0000-0002-1443-2192"},"institutions":[{"id":"https://openalex.org/I181565077","display_name":"Georgia State University","ror":"https://ror.org/03qt6ba18","country_code":"US","type":"education","lineage":["https://openalex.org/I181565077"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Prakash Chourasia","raw_affiliation_strings":["Georgia State University,Department of Computer Science,Atlanta,GA,USA","Department of Computer Science, Georgia State University, Atlanta, GA, USA"],"affiliations":[{"raw_affiliation_string":"Georgia State University,Department of Computer Science,Atlanta,GA,USA","institution_ids":["https://openalex.org/I181565077"]},{"raw_affiliation_string":"Department of Computer Science, Georgia State University, Atlanta, GA, USA","institution_ids":["https://openalex.org/I181565077"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064858842","display_name":"Sarwan Ali","orcid":"https://orcid.org/0000-0001-8121-2168"},"institutions":[{"id":"https://openalex.org/I181565077","display_name":"Georgia State University","ror":"https://ror.org/03qt6ba18","country_code":"US","type":"education","lineage":["https://openalex.org/I181565077"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sarwan Ali","raw_affiliation_strings":["Georgia State University,Department of Computer Science,Atlanta,GA,USA","Department of Computer Science, Georgia State University, Atlanta, GA, USA"],"affiliations":[{"raw_affiliation_string":"Georgia State University,Department of Computer Science,Atlanta,GA,USA","institution_ids":["https://openalex.org/I181565077"]},{"raw_affiliation_string":"Department of Computer Science, Georgia State University, Atlanta, GA, USA","institution_ids":["https://openalex.org/I181565077"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5026228482","display_name":"Murray Patterson","orcid":"https://orcid.org/0000-0002-4329-0234"},"institutions":[{"id":"https://openalex.org/I181565077","display_name":"Georgia State University","ror":"https://ror.org/03qt6ba18","country_code":"US","type":"education","lineage":["https://openalex.org/I181565077"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Murray Patterson","raw_affiliation_strings":["Georgia State University,Department of Computer Science,Atlanta,GA,USA","Department of Computer Science, Georgia State University, Atlanta, GA, USA"],"affiliations":[{"raw_affiliation_string":"Georgia State University,Department of Computer Science,Atlanta,GA,USA","institution_ids":["https://openalex.org/I181565077"]},{"raw_affiliation_string":"Department of Computer Science, Georgia State University, Atlanta, GA, USA","institution_ids":["https://openalex.org/I181565077"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5017366862"],"corresponding_institution_ids":["https://openalex.org/I181565077"],"apc_list":null,"apc_paid":null,"fwci":4.4092,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.96281646,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"101","last_page":"106"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10885","display_name":"Gene expression and cancer classification","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10885","display_name":"Gene expression and cancer classification","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10057","display_name":"Face and Expression Recognition","score":0.991599977016449,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10640","display_name":"Spectroscopy and Chemometric Analyses","score":0.9864000082015991,"subfield":{"id":"https://openalex.org/subfields/1602","display_name":"Analytical Chemistry"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/initialization","display_name":"Initialization","score":0.9088003635406494},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.6846407651901245},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6694690585136414},{"id":"https://openalex.org/keywords/stochastic-gradient-descent","display_name":"Stochastic gradient descent","score":0.6125672459602356},{"id":"https://openalex.org/keywords/pairwise-comparison","display_name":"Pairwise comparison","score":0.5584095120429993},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.44481369853019714},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.38467782735824585},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.38327741622924805},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.34849485754966736},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2867242097854614},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.09837546944618225},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.09675461053848267}],"concepts":[{"id":"https://openalex.org/C114466953","wikidata":"https://www.wikidata.org/wiki/Q6034165","display_name":"Initialization","level":2,"score":0.9088003635406494},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.6846407651901245},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6694690585136414},{"id":"https://openalex.org/C206688291","wikidata":"https://www.wikidata.org/wiki/Q7617819","display_name":"Stochastic gradient descent","level":3,"score":0.6125672459602356},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.5584095120429993},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.44481369853019714},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.38467782735824585},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.38327741622924805},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34849485754966736},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2867242097854614},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.09837546944618225},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.09675461053848267},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata55660.2022.10020217","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata55660.2022.10020217","pdf_url":null,"source":{"id":"https://openalex.org/S4363607709","display_name":"2022 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W2013736751","https://openalex.org/W2057491655","https://openalex.org/W2088803186","https://openalex.org/W2089468765","https://openalex.org/W2123649031","https://openalex.org/W2157444450","https://openalex.org/W2187089797","https://openalex.org/W2740590008","https://openalex.org/W2907464407","https://openalex.org/W3009335299","https://openalex.org/W3127452014","https://openalex.org/W3167017974","https://openalex.org/W3188178217","https://openalex.org/W3192651086","https://openalex.org/W3196141744","https://openalex.org/W3198971816","https://openalex.org/W3216778212","https://openalex.org/W3217347027","https://openalex.org/W4214918942","https://openalex.org/W4220693339","https://openalex.org/W4295759516","https://openalex.org/W6683161245","https://openalex.org/W6757767656"],"related_works":["https://openalex.org/W3204184292","https://openalex.org/W3176564347","https://openalex.org/W1985458517","https://openalex.org/W2355833770","https://openalex.org/W3031039437","https://openalex.org/W3095877357","https://openalex.org/W183202219","https://openalex.org/W2072565696","https://openalex.org/W10861731","https://openalex.org/W2050451745"],"abstract_inverted_index":{"The":[0],"t-distributed":[1],"stochastic":[2],"neighbor":[3],"embedding":[4],"(t-SNE)":[5],"is":[6,43,110,131],"a":[7,20,112,115,147],"method":[8],"for":[9,195],"interpreting":[10],"high":[11],"dimensional":[12,22],"(HD)":[13],"data":[14],"by":[15,93,133,234],"mapping":[16],"each":[17],"point":[18],"to":[19,29,65,81,221,260],"low":[21],"(LD)":[23],"space":[24],"(usually":[25],"two-dimensional).":[26],"It":[27],"seeks":[28],"retain":[30],"the":[31,34,40,44,50,67,108,119,123,128,136,151,159,191,198,212,225,229],"structure":[32],"of":[33,39,53,101,107,153,161,175,197,207],"data.":[35],"An":[36],"important":[37],"component":[38,106],"t-SNE":[41,109,162,214,247,255],"algorithm":[42],"initialization":[45,52,166,241],"procedure,":[46],"which":[47,176],"begins":[48],"with":[49,163,228,263],"random":[51],"an":[54],"LD":[55],"vector.":[56],"Points":[57],"in":[58,135,150],"this":[59,155],"initial":[60],"vector":[61],"are":[62,178],"then":[63],"updated":[64],"minimize":[66],"loss":[68],"function":[69],"(the":[70],"KL":[71],"divergence)":[72],"iteratively":[73],"using":[74,111,170,235],"gradient":[75],"descent.":[76],"This":[77],"leads":[78],"comparable":[79],"points":[80,88],"attract":[82],"one":[83],"another":[84],"while":[85],"pushing":[86],"dissimilar":[87],"apart.":[89],"We":[90,201,210,231],"believe":[91],"that,":[92],"default,":[94],"these":[95,208],"algorithms":[96],"should":[97],"employ":[98],"some":[99],"form":[100],"informative":[102],"initialization.":[103,266],"Another":[104],"essential":[105],"kernel":[113,130,142,243],"matrix,":[114],"similarity":[116],"matrix":[117,244],"comprising":[118],"pairwise":[120],"distances":[121],"among":[122],"sequences.":[124],"For":[125],"t-SNE-based":[126],"visualization,":[127],"Gaussian":[129],"employed":[132],"default":[134],"literature.":[137],"However,":[138],"we":[139,157,252],"show":[140,232,253],"that":[141,233,246,254],"selection":[143],"can":[144],"also":[145,256],"play":[146],"crucial":[148],"role":[149],"performance":[152,160],"t-SNE.In":[154],"work,":[156],"assess":[158],"various":[164,187],"alternative":[165],"methods":[167,227],"and":[168,204,216,223,242],"kernels,":[169],"four":[171],"different":[172,236],"sets,":[173],"out":[174],"three":[177],"biological":[179],"sequences":[180,196],"(nucleotide,":[181],"protein,":[182],"etc.)":[183],"datasets":[184],"obtained":[185],"from":[186],"sources,":[188],"such":[189,238],"as":[190,239],"well-known":[192],"GISAID":[193],"database":[194],"SARS-CoV-2":[199],"virus.":[200],"perform":[202],"subjective":[203],"objective":[205],"assessments":[206],"alternatives.":[209],"use":[211],"resulting":[213],"plots":[215],"k-ary":[217],"neighborhood":[218],"agreement":[219],"(k-ANA)":[220],"evaluate":[222],"compare":[224],"proposed":[226],"baselines.":[230],"techniques,":[237],"informed":[240],"selection,":[245],"performs":[248],"significantly":[249],"better.":[250],"Moreover,":[251],"takes":[257],"fewer":[258],"iterations":[259],"converge":[261],"faster":[262],"more":[264],"intelligent":[265]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
