{"id":"https://openalex.org/W2767431106","doi":"https://doi.org/10.1145/3132847.3133051","title":"Learning Biological Sequence Types Using the Literature","display_name":"Learning Biological Sequence Types Using the Literature","publication_year":2017,"publication_date":"2017-11-06","ids":{"openalex":"https://openalex.org/W2767431106","doi":"https://doi.org/10.1145/3132847.3133051","mag":"2767431106"},"language":"en","primary_location":{"id":"doi:10.1145/3132847.3133051","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3132847.3133051","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2017 ACM on Conference on Information and Knowledge Management","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://figshare.com/articles/conference_contribution/Learning_biological_sequence_types_using_the_literature/20733229","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018899731","display_name":"Mohamed Reda Bouadjenek","orcid":"https://orcid.org/0000-0003-1807-430X"},"institutions":[{"id":"https://openalex.org/I165779595","display_name":"University of Melbourne","ror":"https://ror.org/01ej9dk98","country_code":"AU","type":"education","lineage":["https://openalex.org/I165779595"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Mohamed Reda Bouadjenek","raw_affiliation_strings":["The University of Melbourne, Parkville, Victoria, Australia"],"affiliations":[{"raw_affiliation_string":"The University of Melbourne, Parkville, Victoria, Australia","institution_ids":["https://openalex.org/I165779595"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067214173","display_name":"Karin Verspoor","orcid":"https://orcid.org/0000-0002-8661-1544"},"institutions":[{"id":"https://openalex.org/I165779595","display_name":"University of Melbourne","ror":"https://ror.org/01ej9dk98","country_code":"AU","type":"education","lineage":["https://openalex.org/I165779595"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Karin Verspoor","raw_affiliation_strings":["The University of Melbourne, Parkville, Victoria, Australia"],"affiliations":[{"raw_affiliation_string":"The University of Melbourne, Parkville, Victoria, Australia","institution_ids":["https://openalex.org/I165779595"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5041495909","display_name":"Justin Zobel","orcid":"https://orcid.org/0000-0001-6622-032X"},"institutions":[{"id":"https://openalex.org/I165779595","display_name":"University of Melbourne","ror":"https://ror.org/01ej9dk98","country_code":"AU","type":"education","lineage":["https://openalex.org/I165779595"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Justin Zobel","raw_affiliation_strings":["The University of Melbourne, Parkville, Victoria, Australia"],"affiliations":[{"raw_affiliation_string":"The University of Melbourne, Parkville, Victoria, Australia","institution_ids":["https://openalex.org/I165779595"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5018899731"],"corresponding_institution_ids":["https://openalex.org/I165779595"],"apc_list":null,"apc_paid":null,"fwci":0.2245,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.5741012,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1991","last_page":"1994"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9937000274658203,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7061156034469604},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.6106743216514587},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5500137805938721},{"id":"https://openalex.org/keywords/intuition","display_name":"Intuition","score":0.45330557227134705},{"id":"https://openalex.org/keywords/data-type","display_name":"Data type","score":0.4133495092391968},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4114997386932373},{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting (machine learning)","score":0.4108664095401764},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.40135759115219116},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.37454134225845337},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3714839518070221}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7061156034469604},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.6106743216514587},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5500137805938721},{"id":"https://openalex.org/C132010649","wikidata":"https://www.wikidata.org/wiki/Q189222","display_name":"Intuition","level":2,"score":0.45330557227134705},{"id":"https://openalex.org/C138958017","wikidata":"https://www.wikidata.org/wiki/Q190087","display_name":"Data type","level":2,"score":0.4133495092391968},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4114997386932373},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.4108664095401764},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.40135759115219116},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.37454134225845337},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3714839518070221},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3132847.3133051","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3132847.3133051","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2017 ACM on Conference on Information and Knowledge Management","raw_type":"proceedings-article"},{"id":"pmh:oai:dro.deakin.edu.au:DU:30131353","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306401102","display_name":"Own your potential (DEAKIN)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I149704539","host_organization_name":"Deakin University","host_organization_lineage":["https://openalex.org/I149704539"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference Paper"},{"id":"pmh:oai:figshare.com:article/20733229","is_oa":true,"landing_page_url":"https://figshare.com/articles/conference_contribution/Learning_biological_sequence_types_using_the_literature/20733229","pdf_url":null,"source":{"id":"https://openalex.org/S4377196282","display_name":"Figshare","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210132348","host_organization_name":"Figshare (United Kingdom)","host_organization_lineage":["https://openalex.org/I4210132348"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Text"}],"best_oa_location":{"id":"pmh:oai:figshare.com:article/20733229","is_oa":true,"landing_page_url":"https://figshare.com/articles/conference_contribution/Learning_biological_sequence_types_using_the_literature/20733229","pdf_url":null,"source":{"id":"https://openalex.org/S4377196282","display_name":"Figshare","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210132348","host_organization_name":"Figshare (United Kingdom)","host_organization_lineage":["https://openalex.org/I4210132348"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Text"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.7300000190734863,"id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G3459733074","display_name":null,"funder_award_id":"DP150101550","funder_id":"https://openalex.org/F4320334704","funder_display_name":"Australian Research Council"}],"funders":[{"id":"https://openalex.org/F4320334704","display_name":"Australian Research Council","ror":"https://ror.org/05mmh0f86"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W1482214997","https://openalex.org/W1577798322","https://openalex.org/W1608396526","https://openalex.org/W1973430024","https://openalex.org/W1999822363","https://openalex.org/W2079168273","https://openalex.org/W2119821739","https://openalex.org/W2136542423","https://openalex.org/W2152019382","https://openalex.org/W2153635508","https://openalex.org/W2580489895","https://openalex.org/W2950687658","https://openalex.org/W4240913316","https://openalex.org/W6628905179"],"related_works":["https://openalex.org/W2125652721","https://openalex.org/W1540371141","https://openalex.org/W4231274751","https://openalex.org/W1549363203","https://openalex.org/W2154063878","https://openalex.org/W2556012038","https://openalex.org/W1489772951","https://openalex.org/W1538046993","https://openalex.org/W3082059448","https://openalex.org/W4313640622"],"abstract_inverted_index":{"We":[0,119],"explore":[1],"in":[2,12,30,37,161],"this":[3,70,88,151,189],"paper":[4],"automatic":[5,91],"biological":[6,13],"sequence":[7,14,17,28,46,92,103,141,192],"type":[8,18,47,93,193],"classification":[9,117,196],"for":[10,115,150,210],"records":[11,104],"databases.":[15],"The":[16,143,168],"attribute":[19,48],"provides":[20],"important":[21],"information":[22],"about":[23],"the":[24,45,83,97,116,148,174,180],"nature":[25],"of":[26,75,90,99,109,123,138,191,201],"a":[27,31,51,121,128,134],"represented":[29],"record,":[32],"and":[33,55,66,126,203],"is":[34,49,58,153,182],"often":[35],"used":[36,209],"search":[38],"to":[39,60,102,132,157,187],"filter":[40],"out":[41],"irrelevant":[42],"sequences.":[43],"However,":[44],"generally":[50],"non-mandatory":[52],"free-text":[53],"field,":[54],"thus":[56],"it":[57],"subject":[59],"many":[61],"errors":[62],"including":[63],"typos,":[64],"mis-assignment,":[65],"non-assignment.":[67],"In":[68],"GenBank,":[69],"problem":[71,89,190],"concerns":[72],"roughly":[73],"18%":[74],"records,":[76],"an":[77,106,184,199],"alarming":[78],"number":[79],"that":[80,111,154,179],"should":[81],"worry":[82],"biocuration":[84],"community.":[85],"To":[86],"address":[87,188],"classification,":[94],"we":[95,170],"propose":[96],"use":[98],"literature":[100,149,181],"associated":[101],"as":[105],"external":[107],"source":[108],"knowledge":[110],"can":[112],"be":[113,158],"leveraged":[114],"task.":[118],"define":[120],"set":[122],"literature-based":[124],"features":[125],"train":[127],"machine":[129],"learning":[130],"algorithm":[131],"classify":[133],"record":[135],"into":[136],"one":[137],"six":[139],"primary":[140],"types.":[142],"main":[144],"intuition":[145],"behind":[146],"using":[147],"task":[152],"sequences":[155],"appear":[156],"discussed":[159],"differently":[160],"scientific":[162],"articles,":[163],"depending":[164],"on":[165,173],"their":[166],"type.":[167],"experiments":[169],"have":[171],"conducted":[172],"PubMed":[175],"Central":[176],"collection":[177],"show":[178],"indeed":[183],"effective":[185],"way":[186],"classification.":[194],"Our":[195],"method":[197],"reached":[198],"accuracy":[200],"92.7%,":[202],"substantially":[204],"outperformed":[205],"two":[206],"baseline":[207],"approaches":[208],"comparison.":[211]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2017,"cited_by_count":1}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
