{"id":"https://openalex.org/W1678180381","doi":"https://doi.org/10.1186/1471-2105-5-131","title":"What can we learn from noncoding regions of similarity between genomes?","display_name":"What can we learn from noncoding regions of similarity between genomes?","publication_year":2004,"publication_date":"2004-09-15","ids":{"openalex":"https://openalex.org/W1678180381","doi":"https://doi.org/10.1186/1471-2105-5-131","mag":"1678180381"},"language":"en","primary_location":{"id":"doi:10.1186/1471-2105-5-131","is_oa":true,"landing_page_url":"https://doi.org/10.1186/1471-2105-5-131","pdf_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/1471-2105-5-131","source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/1471-2105-5-131","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5113677486","display_name":"Thomas A. Down","orcid":null},"institutions":[{"id":"https://openalex.org/I2802476451","display_name":"Wellcome Sanger Institute","ror":"https://ror.org/05cy4wa09","country_code":"GB","type":"nonprofit","lineage":["https://openalex.org/I2802476451","https://openalex.org/I87048295"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Thomas A Down","raw_affiliation_strings":["Wellcome Trust Sanger Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge, CB10 1SA, UK","Wellcome Trust Sanger Institute, Wellcome trust Genome Campus, Hinxton, Cambridge, UK"],"affiliations":[{"raw_affiliation_string":"Wellcome Trust Sanger Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge, CB10 1SA, UK","institution_ids":["https://openalex.org/I2802476451"]},{"raw_affiliation_string":"Wellcome Trust Sanger Institute, Wellcome trust Genome Campus, Hinxton, Cambridge, UK","institution_ids":["https://openalex.org/I2802476451"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5007549481","display_name":"Tim Hubbard","orcid":"https://orcid.org/0000-0002-1767-9318"},"institutions":[{"id":"https://openalex.org/I2802476451","display_name":"Wellcome Sanger Institute","ror":"https://ror.org/05cy4wa09","country_code":"GB","type":"nonprofit","lineage":["https://openalex.org/I2802476451","https://openalex.org/I87048295"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Tim JP Hubbard","raw_affiliation_strings":["Wellcome Trust Sanger Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge, CB10 1SA, UK","Wellcome Trust Sanger Institute, Wellcome trust Genome Campus, Hinxton, Cambridge, UK"],"affiliations":[{"raw_affiliation_string":"Wellcome Trust Sanger Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge, CB10 1SA, UK","institution_ids":["https://openalex.org/I2802476451"]},{"raw_affiliation_string":"Wellcome Trust Sanger Institute, Wellcome trust Genome Campus, Hinxton, Cambridge, UK","institution_ids":["https://openalex.org/I2802476451"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5113677486"],"corresponding_institution_ids":["https://openalex.org/I2802476451"],"apc_list":{"value":1690,"currency":"GBP","value_usd":2072},"apc_paid":{"value":1690,"currency":"GBP","value_usd":2072},"fwci":0.7198,"has_fulltext":true,"cited_by_count":11,"citation_normalized_percentile":{"value":0.6638939,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"5","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10521","display_name":"RNA and protein synthesis mechanisms","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/genome","display_name":"Genome","score":0.754713237285614},{"id":"https://openalex.org/keywords/computational-biology","display_name":"Computational biology","score":0.6708576679229736},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.6303927898406982},{"id":"https://openalex.org/keywords/gene","display_name":"Gene","score":0.6033572554588318},{"id":"https://openalex.org/keywords/conserved-sequence","display_name":"Conserved sequence","score":0.5706620216369629},{"id":"https://openalex.org/keywords/coding-region","display_name":"Coding region","score":0.5485778450965881},{"id":"https://openalex.org/keywords/dna-microarray","display_name":"DNA microarray","score":0.5343847274780273},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.496432363986969},{"id":"https://openalex.org/keywords/genomics","display_name":"Genomics","score":0.4777866303920746},{"id":"https://openalex.org/keywords/human-genome","display_name":"Human genome","score":0.4735897183418274},{"id":"https://openalex.org/keywords/gene-prediction","display_name":"Gene prediction","score":0.4708176255226135},{"id":"https://openalex.org/keywords/sequence-alignment","display_name":"Sequence alignment","score":0.4405101537704468},{"id":"https://openalex.org/keywords/gene-expression","display_name":"Gene expression","score":0.19177868962287903},{"id":"https://openalex.org/keywords/peptide-sequence","display_name":"Peptide sequence","score":0.14262831211090088}],"concepts":[{"id":"https://openalex.org/C141231307","wikidata":"https://www.wikidata.org/wiki/Q7020","display_name":"Genome","level":3,"score":0.754713237285614},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.6708576679229736},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.6303927898406982},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.6033572554588318},{"id":"https://openalex.org/C199216141","wikidata":"https://www.wikidata.org/wiki/Q4995178","display_name":"Conserved sequence","level":4,"score":0.5706620216369629},{"id":"https://openalex.org/C91779695","wikidata":"https://www.wikidata.org/wiki/Q3780824","display_name":"Coding region","level":3,"score":0.5485778450965881},{"id":"https://openalex.org/C95371953","wikidata":"https://www.wikidata.org/wiki/Q591745","display_name":"DNA microarray","level":4,"score":0.5343847274780273},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.496432363986969},{"id":"https://openalex.org/C189206191","wikidata":"https://www.wikidata.org/wiki/Q222046","display_name":"Genomics","level":4,"score":0.4777866303920746},{"id":"https://openalex.org/C197077220","wikidata":"https://www.wikidata.org/wiki/Q720988","display_name":"Human genome","level":4,"score":0.4735897183418274},{"id":"https://openalex.org/C105565629","wikidata":"https://www.wikidata.org/wiki/Q1248292","display_name":"Gene prediction","level":4,"score":0.4708176255226135},{"id":"https://openalex.org/C45484198","wikidata":"https://www.wikidata.org/wiki/Q827246","display_name":"Sequence alignment","level":4,"score":0.4405101537704468},{"id":"https://openalex.org/C150194340","wikidata":"https://www.wikidata.org/wiki/Q26972","display_name":"Gene expression","level":3,"score":0.19177868962287903},{"id":"https://openalex.org/C167625842","wikidata":"https://www.wikidata.org/wiki/Q899763","display_name":"Peptide sequence","level":3,"score":0.14262831211090088}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1186/1471-2105-5-131","is_oa":true,"landing_page_url":"https://doi.org/10.1186/1471-2105-5-131","pdf_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/1471-2105-5-131","source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},{"id":"pmh:oai:kclpure.kcl.ac.uk:openaire/0292f10c-9b70-47e0-91e7-b285736a1c19","is_oa":true,"landing_page_url":"https://kclpure.kcl.ac.uk/portal/en/publications/0292f10c-9b70-47e0-91e7-b285736a1c19","pdf_url":null,"source":{"id":"https://openalex.org/S4306400216","display_name":"Research Portal (King's College London)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I183935753","host_organization_name":"King's College London","host_organization_lineage":["https://openalex.org/I183935753"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Down, T A & Hubbard, T J P 2004, 'What can we learn from noncoding regions of similarity between genomes?', BMC Bioinformatics, vol. 5, 131. https://doi.org/10.1186/1471-2105-5-131","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:doaj.org/article:d745c66d1bff4b46a70591ca38d942a7","is_oa":true,"landing_page_url":"https://doaj.org/article/d745c66d1bff4b46a70591ca38d942a7","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"BMC Bioinformatics, Vol 5, Iss 1, p 131 (2004)","raw_type":"article"},{"id":"pmh:oai:pubmedcentral.nih.gov:523850","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/523850","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"BMC Bioinformatics","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1186/1471-2105-5-131","is_oa":true,"landing_page_url":"https://doi.org/10.1186/1471-2105-5-131","pdf_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/1471-2105-5-131","source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320311904","display_name":"Wellcome Trust","ror":"https://ror.org/029chgv08"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W1678180381.pdf","grobid_xml":"https://content.openalex.org/works/W1678180381.grobid-xml"},"referenced_works_count":19,"referenced_works":["https://openalex.org/W1648445109","https://openalex.org/W1999966058","https://openalex.org/W2003463914","https://openalex.org/W2021868564","https://openalex.org/W2057998009","https://openalex.org/W2098704098","https://openalex.org/W2099784985","https://openalex.org/W2101690350","https://openalex.org/W2103225674","https://openalex.org/W2112814753","https://openalex.org/W2124332179","https://openalex.org/W2131581981","https://openalex.org/W2151464048","https://openalex.org/W2158304452","https://openalex.org/W2166187656","https://openalex.org/W2168909179","https://openalex.org/W4231390238","https://openalex.org/W4242043490","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W2181549235","https://openalex.org/W2141847640","https://openalex.org/W2057750912","https://openalex.org/W2122609286","https://openalex.org/W2137491265","https://openalex.org/W2109672343","https://openalex.org/W2077151220","https://openalex.org/W1973727281","https://openalex.org/W2046212479","https://openalex.org/W3209520725"],"abstract_inverted_index":{"In":[0],"addition":[1],"to":[2,23,32,57,73,94,106,137,170],"known":[3],"protein-coding":[4],"genes,":[5],"large":[6],"amounts":[7],"of":[8,39,65,109,126,163,179,195,205],"apparently":[9],"non-coding":[10,66,76,160],"sequence":[11],"are":[12,29,129],"conserved":[13,27,67],"between":[14],"the":[15,40,52,59,75,81,91,107,124,149,153,159,164,171],"human":[16],"and":[17,78,121],"mouse":[18],"genomes.":[19],"It":[20],"seems":[21],"reasonable":[22],"assume":[24],"that":[25,80,100,123,138,148,173],"these":[26],"regions":[28,181],"more":[30],"likely":[31],"contain":[33],"functional":[34,161],"elements":[35],"than":[36],"less-conserved":[37],"portions":[38],"genome.":[41,165],"Here":[42],"we":[43,98],"used":[44],"a":[45,63,176,192,203],"motif-oriented":[46],"machine":[47],"learning":[48],"method":[49,115,133],"based":[50],"on":[51],"Relevance":[53],"Vector":[54],"Machine":[55],"algorithm":[56],"extract":[58],"strongest":[60,154],"signal":[61,151,157],"from":[62],"set":[64,125],"sequences.":[68],"We":[69,112],"successfully":[70],"fitted":[71],"models":[72,93],"reflect":[74],"sequences,":[77],"showed":[79,122],"results":[82,144],"were":[83],"quite":[84],"consistent":[85],"for":[86],"repeated":[87],"training":[88],"runs.":[89],"Using":[90],"learned":[92],"scan":[95],"genomic":[96],"sequence,":[97],"found":[99],"they":[101],"often":[102],"made":[103],"predictions":[104],"close":[105],"start":[108],"annotated":[110],"genes.":[111],"compared":[113],"this":[114,132],"with":[116],"other":[117],"published":[118],"promoter-prediction":[119],"systems,":[120],"promoters":[127],"which":[128,182],"detected":[130,139],"by":[131,140,202],"is":[134,152,200],"substantially":[135],"similar":[136],"existing":[141],"methods.":[142,208],"The":[143],"presented":[145],"here":[146],"indicate":[147],"promoter":[150,180],"single":[155],"motif-based":[156],"in":[158],"fraction":[162],"They":[166],"also":[167],"lend":[168],"support":[169],"belief":[172],"there":[174],"exists":[175],"substantial":[177],"subset":[178,199],"share":[183],"several":[184],"common":[185],"features":[186],"including,":[187],"but":[188],"not":[189],"restricted":[190],"to,":[191],"relative":[193],"abundance":[194],"CpG":[196],"dinucleotides.":[197],"This":[198],"detectable":[201],"variety":[204],"distinct":[206],"computational":[207]},"counts_by_year":[],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
