{"id":"https://openalex.org/W2128766921","doi":"https://doi.org/10.1093/bioinformatics/bts176","title":"Automated gene-model curation using global discriminative learning","display_name":"Automated gene-model curation using global discriminative learning","publication_year":2012,"publication_date":"2012-04-18","ids":{"openalex":"https://openalex.org/W2128766921","doi":"https://doi.org/10.1093/bioinformatics/bts176","mag":"2128766921","pmid":"https://pubmed.ncbi.nlm.nih.gov/22513996"},"language":"en","primary_location":{"id":"doi:10.1093/bioinformatics/bts176","is_oa":false,"landing_page_url":"https://doi.org/10.1093/bioinformatics/bts176","pdf_url":null,"source":{"id":"https://openalex.org/S52395412","display_name":"Bioinformatics","issn_l":"1367-4803","issn":["1367-4803","1367-4811"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Bioinformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5081766149","display_name":"Axel Bernal","orcid":"https://orcid.org/0000-0002-7805-0257"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I174306211","display_name":"Technion \u2013 Israel Institute of Technology","ror":"https://ror.org/03qryx823","country_code":"IL","type":"education","lineage":["https://openalex.org/I174306211"]},{"id":"https://openalex.org/I4210117425","display_name":"Google (Israel)","ror":"https://ror.org/02c20ys54","country_code":"IL","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210117425","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I79576946","display_name":"University of Pennsylvania","ror":"https://ror.org/00b30xv10","country_code":"US","type":"education","lineage":["https://openalex.org/I79576946"]}],"countries":["IL","US"],"is_corresponding":true,"raw_author_name":"Axel Bernal","raw_affiliation_strings":["1 Department of Computer and Information Science, University of Pennsylvania, Philadelphia, PA 19104, USA, 2Department of Electrical Engineering, Technion. Israel Institute of Technology, Haifa 32000, Israel and 3Google Inc. Mountain View, CA, 94043, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"1 Department of Computer and Information Science, University of Pennsylvania, Philadelphia, PA 19104, USA, 2Department of Electrical Engineering, Technion. Israel Institute of Technology, Haifa 32000, Israel and 3Google Inc. Mountain View, CA, 94043, USA","institution_ids":["https://openalex.org/I174306211","https://openalex.org/I1291425158","https://openalex.org/I4210117425","https://openalex.org/I79576946"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006419939","display_name":"Koby Crammer","orcid":"https://orcid.org/0000-0001-8824-5747"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I174306211","display_name":"Technion \u2013 Israel Institute of Technology","ror":"https://ror.org/03qryx823","country_code":"IL","type":"education","lineage":["https://openalex.org/I174306211"]},{"id":"https://openalex.org/I4210117425","display_name":"Google (Israel)","ror":"https://ror.org/02c20ys54","country_code":"IL","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210117425","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I79576946","display_name":"University of Pennsylvania","ror":"https://ror.org/00b30xv10","country_code":"US","type":"education","lineage":["https://openalex.org/I79576946"]}],"countries":["IL","US"],"is_corresponding":false,"raw_author_name":"Koby Crammer","raw_affiliation_strings":["1 Department of Computer and Information Science, University of Pennsylvania, Philadelphia, PA 19104, USA, 2Department of Electrical Engineering, Technion. Israel Institute of Technology, Haifa 32000, Israel and 3Google Inc. Mountain View, CA, 94043, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"1 Department of Computer and Information Science, University of Pennsylvania, Philadelphia, PA 19104, USA, 2Department of Electrical Engineering, Technion. Israel Institute of Technology, Haifa 32000, Israel and 3Google Inc. Mountain View, CA, 94043, USA","institution_ids":["https://openalex.org/I174306211","https://openalex.org/I1291425158","https://openalex.org/I4210117425","https://openalex.org/I79576946"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044708805","display_name":"Fernando Pereira","orcid":"https://orcid.org/0000-0001-6100-947X"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I174306211","display_name":"Technion \u2013 Israel Institute of Technology","ror":"https://ror.org/03qryx823","country_code":"IL","type":"education","lineage":["https://openalex.org/I174306211"]},{"id":"https://openalex.org/I4210117425","display_name":"Google (Israel)","ror":"https://ror.org/02c20ys54","country_code":"IL","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210117425","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I79576946","display_name":"University of Pennsylvania","ror":"https://ror.org/00b30xv10","country_code":"US","type":"education","lineage":["https://openalex.org/I79576946"]}],"countries":["IL","US"],"is_corresponding":false,"raw_author_name":"Fernando Pereira","raw_affiliation_strings":["1 Department of Computer and Information Science, University of Pennsylvania, Philadelphia, PA 19104, USA, 2Department of Electrical Engineering, Technion. Israel Institute of Technology, Haifa 32000, Israel and 3Google Inc. Mountain View, CA, 94043, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"1 Department of Computer and Information Science, University of Pennsylvania, Philadelphia, PA 19104, USA, 2Department of Electrical Engineering, Technion. Israel Institute of Technology, Haifa 32000, Israel and 3Google Inc. Mountain View, CA, 94043, USA","institution_ids":["https://openalex.org/I174306211","https://openalex.org/I1291425158","https://openalex.org/I4210117425","https://openalex.org/I79576946"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5081766149"],"corresponding_institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I174306211","https://openalex.org/I4210117425","https://openalex.org/I79576946"],"apc_list":{"value":3618,"currency":"USD","value_usd":3618},"apc_paid":null,"fwci":0.5374,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.6639308,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"28","issue":"12","first_page":"1571","last_page":"1578"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10887","display_name":"Bioinformatics and Genomic Networks","score":0.6869999766349792,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10887","display_name":"Bioinformatics and Genomic Networks","score":0.6869999766349792,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.1251000016927719,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10885","display_name":"Gene expression and cancer classification","score":0.05090000107884407,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.7343795299530029},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6768852472305298},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.6331741809844971},{"id":"https://openalex.org/keywords/data-curation","display_name":"Data curation","score":0.5300224423408508},{"id":"https://openalex.org/keywords/gene-annotation","display_name":"Gene Annotation","score":0.46152180433273315},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.43195319175720215},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4128722548484802},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3958781659603119},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.29228395223617554},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.2117428183555603},{"id":"https://openalex.org/keywords/genome","display_name":"Genome","score":0.18153369426727295},{"id":"https://openalex.org/keywords/gene","display_name":"Gene","score":0.1813250482082367},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.1434541940689087}],"concepts":[{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.7343795299530029},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6768852472305298},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.6331741809844971},{"id":"https://openalex.org/C91632574","wikidata":"https://www.wikidata.org/wiki/Q15088675","display_name":"Data curation","level":2,"score":0.5300224423408508},{"id":"https://openalex.org/C2908923196","wikidata":"https://www.wikidata.org/wiki/Q5205742","display_name":"Gene Annotation","level":4,"score":0.46152180433273315},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.43195319175720215},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4128722548484802},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3958781659603119},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.29228395223617554},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.2117428183555603},{"id":"https://openalex.org/C141231307","wikidata":"https://www.wikidata.org/wiki/Q7020","display_name":"Genome","level":3,"score":0.18153369426727295},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.1813250482082367},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.1434541940689087}],"mesh":[{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000818","descriptor_name":"Animals","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000818","descriptor_name":"Animals","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000818","descriptor_name":"Animals","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D001185","descriptor_name":"Artificial Intelligence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D001185","descriptor_name":"Artificial Intelligence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D001185","descriptor_name":"Artificial Intelligence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008957","descriptor_name":"Models, Genetic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D008957","descriptor_name":"Models, Genetic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D008957","descriptor_name":"Models, Genetic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D017173","descriptor_name":"Caenorhabditis elegans","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D017173","descriptor_name":"Caenorhabditis elegans","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D017173","descriptor_name":"Caenorhabditis elegans","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D017360","descriptor_name":"Arabidopsis","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D017360","descriptor_name":"Arabidopsis","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D017360","descriptor_name":"Arabidopsis","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D023281","descriptor_name":"Genomics","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D023281","descriptor_name":"Genomics","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D023281","descriptor_name":"Genomics","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false}],"locations_count":2,"locations":[{"id":"doi:10.1093/bioinformatics/bts176","is_oa":false,"landing_page_url":"https://doi.org/10.1093/bioinformatics/bts176","pdf_url":null,"source":{"id":"https://openalex.org/S52395412","display_name":"Bioinformatics","issn_l":"1367-4803","issn":["1367-4803","1367-4811"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Bioinformatics","raw_type":"journal-article"},{"id":"pmid:22513996","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/22513996","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Bioinformatics (Oxford, England)","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6899999976158142,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W1534196162","https://openalex.org/W1966771059","https://openalex.org/W2080667183","https://openalex.org/W2086240273","https://openalex.org/W2098937968","https://openalex.org/W2114452806","https://openalex.org/W2114759476","https://openalex.org/W2118300412","https://openalex.org/W2120815702","https://openalex.org/W2121857350","https://openalex.org/W2122537498","https://openalex.org/W2127186883","https://openalex.org/W2129561668","https://openalex.org/W2130395351","https://openalex.org/W2132372375","https://openalex.org/W2141572089","https://openalex.org/W2143801061","https://openalex.org/W2148130205","https://openalex.org/W2156296160","https://openalex.org/W2159512882","https://openalex.org/W2160218441","https://openalex.org/W2247766769","https://openalex.org/W2555947517","https://openalex.org/W3009009611","https://openalex.org/W6641990611","https://openalex.org/W6677276651","https://openalex.org/W6683584131","https://openalex.org/W6730475274"],"related_works":["https://openalex.org/W2901823680","https://openalex.org/W2059565715","https://openalex.org/W2169425537","https://openalex.org/W1999129612","https://openalex.org/W2116418175","https://openalex.org/W2009940763","https://openalex.org/W2126804125","https://openalex.org/W2946410450","https://openalex.org/W4361192415","https://openalex.org/W2137318037"],"abstract_inverted_index":{"MOTIVATION:":[0],"Gene-model":[1],"curation":[2,23,32,99],"creates":[3],"consensus":[4],"gene":[5,52],"models":[6],"by":[7],"combining":[8],"multiple":[9,103],"sources":[10,73,104],"of":[11,66,105,115,118,147,150,185],"protein-coding":[12],"evidence":[13,106,120],"that":[14,89,101],"may":[15],"be":[16,39],"incomplete":[17],"or":[18],"inconsistent.":[19],"To":[20],"date,":[21],"manual":[22,31,60],"still":[24],"produces":[25],"the":[26,43,82,151,164,189],"highest":[27],"quality":[28],"models.":[29],"However,":[30],"is":[33],"too":[34],"slow":[35],"and":[36,68,74,154,174,202],"costly":[37],"to":[38,59,78,125],"completed":[40],"even":[41],"for":[42,169],"most":[44],"important":[45],"organisms.":[46],"In":[47,177],"recent":[48],"years,":[49],"machine-learned":[50],"ensemble":[51,166,192],"predictors":[53,167],"have":[54,92],"become":[55],"a":[56,85,181],"viable":[57],"alternative":[58],"curation.":[61],"Current":[62],"approaches":[63],"make":[64],"use":[65],"signal":[67],"genomic":[69,119],"region":[70],"consistency":[71],"among":[72],"some":[75],"voting":[76],"scheme":[77],"resolve":[79],"conflicts":[80],"in":[81,88,136,194],"evidence.":[83],"As":[84],"further":[86],"step":[87],"direction,":[90],"we":[91],"developed":[93],"eCRAIG":[94,179],"(ensemble":[95],"CRAIG),":[96],"an":[97],"automated":[98],"tool":[100],"combines":[102],"using":[107],"global":[108],"discriminative":[109],"training.":[110],"This":[111],"allows":[112],"efficient":[113],"integration":[114],"different":[116],"types":[117],"with":[121],"complex":[122],"statistical":[123],"dependencies":[124],"maximize":[126],"directly":[127],"annotation":[128,140,156],"accuracy.":[129],"Our":[130],"method":[131],"goes":[132],"beyond":[133],"previous":[134],"work":[135],"integrating":[137],"novel":[138],"non-linear":[139],"agreement":[141],"features,":[142],"as":[143,145],"well":[144],"combinations":[146],"intrinsic":[148],"features":[149],"target":[152],"sequence":[153],"extrinsic":[155],"features.":[157],"RESULTS:":[158],"We":[159],"achieved":[160,180],"significant":[161],"improvements":[162],"over":[163,187],"best":[165,190],"available":[168,206],"Homo":[170],"sapiens,":[171],"Caenorhabditis":[172],"elegans":[173],"Arabidopsis":[175],"thaliana.":[176],"particular,":[178],"relative":[182],"mean":[183],"improvement":[184],"5.1%":[186],"Jigsaw,":[188],"published":[191],"predictor":[193],"all":[195],"our":[196],"experiments.":[197],"AVAILABILITY:":[198],"The":[199],"source":[200],"code":[201],"datasets":[203],"are":[204],"both":[205],"at":[207],"http://www.seas.upenn.edu/abernal/ecraig.tgz.":[208]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":2},{"year":2013,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
