{"id":"https://openalex.org/W65662874","doi":"https://doi.org/10.1137/1.9781611972733.30","title":"Detection of Underrepresented Biological Sequences Using Class-Conditional Distribution Models","display_name":"Detection of Underrepresented Biological Sequences Using Class-Conditional Distribution Models","publication_year":2003,"publication_date":"2003-05-01","ids":{"openalex":"https://openalex.org/W65662874","doi":"https://doi.org/10.1137/1.9781611972733.30","mag":"65662874"},"language":"en","primary_location":{"id":"doi:10.1137/1.9781611972733.30","is_oa":false,"landing_page_url":"https://doi.org/10.1137/1.9781611972733.30","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2003 SIAM International Conference on Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5059847153","display_name":"Slobodan Vu\u010deti\u0107","orcid":"https://orcid.org/0000-0001-5884-6293"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Slobodan Vucetic","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113935478","display_name":"Dragoljub Pokrajac","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dragoljub Pokrajac","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015750970","display_name":"Hongbo Xie","orcid":"https://orcid.org/0000-0003-2223-0029"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hongbo Xie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5044038055","display_name":"Zoran Obradovi\u0107","orcid":"https://orcid.org/0000-0002-2051-0142"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zoran Obradovic","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5059847153"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.5719,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.62708597,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"279","last_page":"283"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10885","display_name":"Gene expression and cancer classification","score":0.9879999756813049,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10519","display_name":"Advanced Proteomics Techniques and Applications","score":0.9735000133514404,"subfield":{"id":"https://openalex.org/subfields/1607","display_name":"Spectroscopy"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.6865623593330383},{"id":"https://openalex.org/keywords/property","display_name":"Property (philosophy)","score":0.6015494465827942},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5939324498176575},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.5711728930473328},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.564598560333252},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.5286380052566528},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5115092992782593},{"id":"https://openalex.org/keywords/data-set","display_name":"Data set","score":0.5114206671714783},{"id":"https://openalex.org/keywords/reduction","display_name":"Reduction (mathematics)","score":0.49257737398147583},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4872411787509918},{"id":"https://openalex.org/keywords/sampling-bias","display_name":"Sampling bias","score":0.4796198308467865},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.381683349609375},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.36427828669548035},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3253195881843567},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.3015891909599304},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.20112386345863342},{"id":"https://openalex.org/keywords/sample-size-determination","display_name":"Sample size determination","score":0.13325795531272888},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.10111874341964722}],"concepts":[{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.6865623593330383},{"id":"https://openalex.org/C189950617","wikidata":"https://www.wikidata.org/wiki/Q937228","display_name":"Property (philosophy)","level":2,"score":0.6015494465827942},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5939324498176575},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.5711728930473328},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.564598560333252},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.5286380052566528},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5115092992782593},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.5114206671714783},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.49257737398147583},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4872411787509918},{"id":"https://openalex.org/C75917345","wikidata":"https://www.wikidata.org/wiki/Q2725298","display_name":"Sampling bias","level":3,"score":0.4796198308467865},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.381683349609375},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.36427828669548035},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3253195881843567},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3015891909599304},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.20112386345863342},{"id":"https://openalex.org/C129848803","wikidata":"https://www.wikidata.org/wiki/Q2564360","display_name":"Sample size determination","level":2,"score":0.13325795531272888},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.10111874341964722},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1137/1.9781611972733.30","is_oa":false,"landing_page_url":"https://doi.org/10.1137/1.9781611972733.30","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2003 SIAM International Conference on Data Mining","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.697.2217","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.697.2217","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.dabi.temple.edu/%7Ezoran/papers/vucetic_siam03.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.8.9575","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.8.9575","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.ist.temple.edu/~vucetic/documents/vucetic03sdm.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W1503432700","https://openalex.org/W2003144438","https://openalex.org/W2041862730","https://openalex.org/W2061833373","https://openalex.org/W2073745520","https://openalex.org/W2110110505","https://openalex.org/W2147809382","https://openalex.org/W2158714788"],"related_works":["https://openalex.org/W2159790760","https://openalex.org/W4400127305","https://openalex.org/W2299309501","https://openalex.org/W2072676583","https://openalex.org/W1997937452","https://openalex.org/W2970859251","https://openalex.org/W3083491872","https://openalex.org/W2052665155","https://openalex.org/W3193874062","https://openalex.org/W2918060409"],"abstract_inverted_index":{"A":[0],"labeled":[1,85],"sequence":[2],"data":[3,31,118],"set":[4,119],"related":[5],"to":[6,91],"a":[7,30,50,61,77],"certain":[8],"biological":[9,142],"property":[10],"is":[11,34,43,109],"often":[12],"biased":[13],"and,":[14],"therefore,":[15],"does":[16],"not":[17],"completely":[18],"capture":[19],"its":[20],"diversity":[21],"in":[22,141],"nature.":[23],"To":[24],"reduce":[25],"this":[26],"sampling":[27,139],"bias":[28,140],"problem":[29,114],"mining":[32],"procedure":[33,42,72],"proposed":[35,107,132],"for":[36,134],"detecting":[37],"underrepresented":[38,66,94],"relevant":[39],"sequences.":[40,70],"The":[41,124],"aimed":[44],"at":[45],"helping":[46],"domain":[47],"experts":[48],"achieve":[49],"cost-effective":[51],"qualitative":[52],"enlargement":[53],"of":[54,60,64,84,105,115,120,130,138],"knowledge":[55],"through":[56],"an":[57,112,135],"in-depth":[58],"study":[59],"small":[62],"number":[63],"statistically":[65,93],"and":[67,97],"functionally":[68],"interesting":[69],"Our":[71],"consists":[73],"of:":[74],"(i)":[75],"learning":[76],"class-conditional":[78],"distribution":[79],"model":[80],"on":[81,111],"each":[82],"class":[83],"data;":[86],"(ii)":[87],"applying":[88],"the":[89,106,117,128,131],"models":[90],"select":[92],"unlabeled":[95],"sequences;":[96],"(iii)":[98],"automatically":[99],"evaluating":[100],"their":[101],"interestingness.":[102],"An":[103],"application":[104],"approach":[108,133],"illustrated":[110],"important":[113],"increasing":[116],"confirmed":[121],"disordered":[122],"proteins.":[123],"obtained":[125],"results":[126],"demonstrate":[127],"promise":[129],"efficient":[136],"reduction":[137],"databases.":[143]},"counts_by_year":[{"year":2019,"cited_by_count":1},{"year":2017,"cited_by_count":2}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
