{"id":"https://openalex.org/W2144390157","doi":"https://doi.org/10.1145/2093698.2093709","title":"Computational inference of difficult word boundaries in DNA languages","display_name":"Computational inference of difficult word boundaries in DNA languages","publication_year":2011,"publication_date":"2011-10-26","ids":{"openalex":"https://openalex.org/W2144390157","doi":"https://doi.org/10.1145/2093698.2093709","mag":"2144390157"},"language":"en","primary_location":{"id":"doi:10.1145/2093698.2093709","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2093698.2093709","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 4th International Symposium on Applied Sciences in Biomedical and Communication Technologies","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://mediatum.ub.tum.de/node?id=1507703","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5057732779","display_name":"Guy Tsafnat","orcid":"https://orcid.org/0000-0003-4353-2026"},"institutions":[{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Guy Tsafnat","raw_affiliation_strings":["University of New South Wales, Sydney, Australia","University of New South wales, Sydney, Australia"],"affiliations":[{"raw_affiliation_string":"University of New South Wales, Sydney, Australia","institution_ids":["https://openalex.org/I31746571"]},{"raw_affiliation_string":"University of New South wales, Sydney, Australia","institution_ids":["https://openalex.org/I31746571"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057092872","display_name":"Paul Setzermann","orcid":null},"institutions":[{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Paul Setzermann","raw_affiliation_strings":["University of New South Wales, Sydney, Australia","University of New South wales, Sydney, Australia"],"affiliations":[{"raw_affiliation_string":"University of New South Wales, Sydney, Australia","institution_ids":["https://openalex.org/I31746571"]},{"raw_affiliation_string":"University of New South wales, Sydney, Australia","institution_ids":["https://openalex.org/I31746571"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080471486","display_name":"Sally R. Partridge","orcid":"https://orcid.org/0000-0002-0666-8330"},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Sally R. Partridge","raw_affiliation_strings":["University of Sydney, Sydney, Australia","University of Sydney; Sydney; Australia"],"affiliations":[{"raw_affiliation_string":"University of Sydney, Sydney, Australia","institution_ids":["https://openalex.org/I129604602"]},{"raw_affiliation_string":"University of Sydney; Sydney; Australia","institution_ids":["https://openalex.org/I129604602"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009539507","display_name":"Dominik G. Grimm","orcid":"https://orcid.org/0000-0003-2085-4591"},"institutions":[{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Dominik Grimm","raw_affiliation_strings":["University of New South Wales, Sydney, Australia","University of New South wales, Sydney, Australia"],"affiliations":[{"raw_affiliation_string":"University of New South Wales, Sydney, Australia","institution_ids":["https://openalex.org/I31746571"]},{"raw_affiliation_string":"University of New South wales, Sydney, Australia","institution_ids":["https://openalex.org/I31746571"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5057732779"],"corresponding_institution_ids":["https://openalex.org/I31746571"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.1268796,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10521","display_name":"RNA and protein synthesis mechanisms","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10521","display_name":"RNA and protein synthesis mechanisms","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7039839029312134},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6727200746536255},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5843332409858704},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5753433704376221},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5748330354690552},{"id":"https://openalex.org/keywords/heuristic","display_name":"Heuristic","score":0.5633695125579834},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.49761298298835754},{"id":"https://openalex.org/keywords/dna-sequencing","display_name":"DNA sequencing","score":0.42524972558021545},{"id":"https://openalex.org/keywords/biomedical-text-mining","display_name":"Biomedical text mining","score":0.4193114638328552},{"id":"https://openalex.org/keywords/dna","display_name":"DNA","score":0.36893221735954285},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.23686498403549194},{"id":"https://openalex.org/keywords/text-mining","display_name":"Text mining","score":0.14892923831939697},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.14739161729812622},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.13426271080970764}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7039839029312134},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6727200746536255},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5843332409858704},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5753433704376221},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5748330354690552},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.5633695125579834},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.49761298298835754},{"id":"https://openalex.org/C51679486","wikidata":"https://www.wikidata.org/wiki/Q380546","display_name":"DNA sequencing","level":3,"score":0.42524972558021545},{"id":"https://openalex.org/C165141518","wikidata":"https://www.wikidata.org/wiki/Q4915126","display_name":"Biomedical text mining","level":3,"score":0.4193114638328552},{"id":"https://openalex.org/C552990157","wikidata":"https://www.wikidata.org/wiki/Q7430","display_name":"DNA","level":2,"score":0.36893221735954285},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.23686498403549194},{"id":"https://openalex.org/C71472368","wikidata":"https://www.wikidata.org/wiki/Q676880","display_name":"Text mining","level":2,"score":0.14892923831939697},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.14739161729812622},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.13426271080970764},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/2093698.2093709","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2093698.2093709","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 4th International Symposium on Applied Sciences in Biomedical and Communication Technologies","raw_type":"proceedings-article"},{"id":"pmh:oai:mediatum.ub.tum.de:node/1507703","is_oa":true,"landing_page_url":"http://mediatum.ub.tum.de/node?id=1507703","pdf_url":null,"source":{"id":"https://openalex.org/S4306400453","display_name":"mediaTUM \u2013 the media and publications repository of the Technical University Munich (Technical University Munich)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I62916508","host_organization_name":"Technical University of Munich","host_organization_lineage":["https://openalex.org/I62916508"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:mediatum.ub.tum.de:node/1507703","is_oa":true,"landing_page_url":"http://mediatum.ub.tum.de/node?id=1507703","pdf_url":null,"source":{"id":"https://openalex.org/S4306400453","display_name":"mediaTUM \u2013 the media and publications repository of the Technical University Munich (Technical University Munich)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I62916508","host_organization_name":"Technical University of Munich","host_organization_lineage":["https://openalex.org/I62916508"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.8600000143051147}],"awards":[],"funders":[{"id":"https://openalex.org/F4320325396","display_name":"NSW Ministry of Health","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W12792382","https://openalex.org/W1558136956","https://openalex.org/W1573962804","https://openalex.org/W1804262888","https://openalex.org/W1821016187","https://openalex.org/W1991133427","https://openalex.org/W2013032282","https://openalex.org/W2015292449","https://openalex.org/W2026967058","https://openalex.org/W2038395250","https://openalex.org/W2055043387","https://openalex.org/W2093205346","https://openalex.org/W2096716552","https://openalex.org/W2099303284","https://openalex.org/W2105199251","https://openalex.org/W2105391395","https://openalex.org/W2113828015","https://openalex.org/W2127320892","https://openalex.org/W2138531167","https://openalex.org/W2140777747","https://openalex.org/W2150390352","https://openalex.org/W2153635508","https://openalex.org/W2166339024","https://openalex.org/W2166480651","https://openalex.org/W2170952441","https://openalex.org/W2172228854","https://openalex.org/W3120421331"],"related_works":["https://openalex.org/W2887358545","https://openalex.org/W4241083716","https://openalex.org/W2914463643","https://openalex.org/W2096951189","https://openalex.org/W3185751515","https://openalex.org/W2055243143","https://openalex.org/W4255692567","https://openalex.org/W3179963059","https://openalex.org/W4321636575","https://openalex.org/W2996519767"],"abstract_inverted_index":{"Many":[0],"applications":[1],"in":[2,42,62,69,74],"molecular":[3,94],"and":[4,11,39,54,60,83,96,153],"systems":[5],"biology":[6],"exploit":[7],"similarities":[8],"between":[9,36],"DNA":[10,37,75,177,207],"languages":[12],"to":[13,23,100,112,128,142,149,226],"make":[14],"predictions":[15],"about":[16],"cell":[17],"function.":[18],"This":[19],"approach":[20],"provides":[21],"structure":[22],"an":[24,165],"otherwise":[25],"monotonous":[26],"sequence":[27],"of":[28,32,116,125,139,179,186,188,199,206],"nucleotides.":[29],"However,":[30],"one":[31],"the":[33,114,126,184,189,197,200],"major":[34],"differences":[35],"sequences":[38],"text":[40],"is":[41],"how":[43],"semantic":[44,72],"units":[45,73,89,219],"(e.g.":[46,76],"words)":[47],"are":[48,56,97],"distinguished":[49],"within":[50],"them.":[51],"Whereas":[52],"words":[53],"sentences":[55],"separated":[57],"by":[58,202],"spaces":[59],"punctuation":[61],"natural":[63],"languages,":[64],"no":[65],"such":[66,117],"markers":[67],"exist":[68],"DNA.":[70],"Some":[71],"genes)":[77],"can":[78,146],"be":[79,147],"identified":[80,217],"relatively":[81,85],"easily":[82],"with":[84,210],"high":[86],"accuracy.":[87],"Other":[88],"may":[90],"have":[91],"less":[92],"known":[93,214],"mechanisms":[95],"therefore":[98],"harder":[99],"identify":[101,129],"accurately.":[102],"In":[103],"this":[104],"paper":[105],"we":[106,167],"discuss":[107],"three":[108],"machine":[109,133,155],"learning":[110,134,156],"methods":[111,135,157,191,201],"elucidate":[113],"boundaries":[115,141],"difficult":[118],"units:":[119],"heuristic":[120],"approaches":[121],"use":[122,168],"hypothesized":[123],"models":[124],"mechanism":[127],"word":[130,140],"boundaries,":[131,152],"supervised":[132],"generalise":[136],"labelled":[137],"examples":[138],"a":[143,159,169],"model":[144,160],"that":[145,175,212,220,223],"used":[148],"detect":[150],"these":[151],"unsupervised":[154],"infer":[158],"from":[161],"unlabeled":[162],"data.":[163],"As":[164],"example,":[166],"bacterial":[170],"transposable":[171],"element":[172],"called":[173],"ISEcp1":[174,211],"moves":[176],"segments":[178,208],"variable":[180],"length.":[181],"We":[182,195,216],"assess":[183],"accuracy":[185],"each":[187],"above":[190],"using":[192],"rediscovery":[193],"experiments.":[194],"demonstrate":[196],"power":[198],"examining":[203],"9":[204],"instances":[205],"associated":[209],"lack":[213],"boundaries.":[215],"6":[218],"include":[221],"genes":[222],"confer":[224],"resistance":[225],"clinically":[227],"important":[228],"antibiotics.":[229]},"counts_by_year":[{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":7}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
