{"id":"https://openalex.org/W2887784849","doi":"https://doi.org/10.1109/isit.2018.8437507","title":"Evolution of N-Gram Frequencies Under Duplication and Substitution Mutations","display_name":"Evolution of N-Gram Frequencies Under Duplication and Substitution Mutations","publication_year":2018,"publication_date":"2018-06-01","ids":{"openalex":"https://openalex.org/W2887784849","doi":"https://doi.org/10.1109/isit.2018.8437507","mag":"2887784849"},"language":"en","primary_location":{"id":"doi:10.1109/isit.2018.8437507","is_oa":false,"landing_page_url":"https://doi.org/10.1109/isit.2018.8437507","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Symposium on Information Theory (ISIT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5086990183","display_name":"Hao Lou","orcid":"https://orcid.org/0000-0002-6133-2987"},"institutions":[{"id":"https://openalex.org/I51556381","display_name":"University of Virginia","ror":"https://ror.org/0153tk833","country_code":"US","type":"education","lineage":["https://openalex.org/I51556381"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hao Lou","raw_affiliation_strings":["Electrical and Computer Engineering, University of Virginia, Charlottesville, VA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Electrical and Computer Engineering, University of Virginia, Charlottesville, VA, USA","institution_ids":["https://openalex.org/I51556381"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054043026","display_name":"Moshe Schwartz","orcid":"https://orcid.org/0000-0002-1449-0026"},"institutions":[{"id":"https://openalex.org/I124227911","display_name":"Ben-Gurion University of the Negev","ror":"https://ror.org/05tkyf982","country_code":"IL","type":"education","lineage":["https://openalex.org/I124227911"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Moshe Schwartz","raw_affiliation_strings":["Electrical and Computer Engineering, Ben-Gurion University of the Negev, Beer Sheva, Israel"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Electrical and Computer Engineering, Ben-Gurion University of the Negev, Beer Sheva, Israel","institution_ids":["https://openalex.org/I124227911"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5011621481","display_name":"Farzad Farnoud","orcid":"https://orcid.org/0000-0002-8684-4487"},"institutions":[{"id":"https://openalex.org/I51556381","display_name":"University of Virginia","ror":"https://ror.org/0153tk833","country_code":"US","type":"education","lineage":["https://openalex.org/I51556381"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Farzad Farnoud Hassanzadeh","raw_affiliation_strings":["Electrical and Computer Engineering, University of Virginia, Charlottesville, VA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Electrical and Computer Engineering, University of Virginia, Charlottesville, VA, USA","institution_ids":["https://openalex.org/I51556381"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.3516,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.85924497,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"2246","last_page":"2250"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10521","display_name":"RNA and protein synthesis mechanisms","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.4952518045902252},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4907782971858978},{"id":"https://openalex.org/keywords/substitution","display_name":"Substitution (logic)","score":0.48044702410697937},{"id":"https://openalex.org/keywords/tandem-exon-duplication","display_name":"Tandem exon duplication","score":0.4768192172050476},{"id":"https://openalex.org/keywords/tandem-repeat","display_name":"Tandem repeat","score":0.449907124042511},{"id":"https://openalex.org/keywords/gram","display_name":"Gram","score":0.44730210304260254},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.4238559901714325},{"id":"https://openalex.org/keywords/gene-duplication","display_name":"Gene duplication","score":0.42264309525489807},{"id":"https://openalex.org/keywords/genome","display_name":"Genome","score":0.3580660820007324},{"id":"https://openalex.org/keywords/computational-biology","display_name":"Computational biology","score":0.32579177618026733},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.23690876364707947},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.2048376202583313},{"id":"https://openalex.org/keywords/gene","display_name":"Gene","score":0.1792210042476654},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.08313402533531189}],"concepts":[{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.4952518045902252},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4907782971858978},{"id":"https://openalex.org/C2778220771","wikidata":"https://www.wikidata.org/wiki/Q1522579","display_name":"Substitution (logic)","level":2,"score":0.48044702410697937},{"id":"https://openalex.org/C156446351","wikidata":"https://www.wikidata.org/wiki/Q7682435","display_name":"Tandem exon duplication","level":4,"score":0.4768192172050476},{"id":"https://openalex.org/C27149982","wikidata":"https://www.wikidata.org/wiki/Q3456514","display_name":"Tandem repeat","level":4,"score":0.449907124042511},{"id":"https://openalex.org/C161369605","wikidata":"https://www.wikidata.org/wiki/Q41803","display_name":"Gram","level":3,"score":0.44730210304260254},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.4238559901714325},{"id":"https://openalex.org/C7602840","wikidata":"https://www.wikidata.org/wiki/Q746284","display_name":"Gene duplication","level":3,"score":0.42264309525489807},{"id":"https://openalex.org/C141231307","wikidata":"https://www.wikidata.org/wiki/Q7020","display_name":"Genome","level":3,"score":0.3580660820007324},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.32579177618026733},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.23690876364707947},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.2048376202583313},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.1792210042476654},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.08313402533531189},{"id":"https://openalex.org/C523546767","wikidata":"https://www.wikidata.org/wiki/Q10876","display_name":"Bacteria","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/isit.2018.8437507","is_oa":false,"landing_page_url":"https://doi.org/10.1109/isit.2018.8437507","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Symposium on Information Theory (ISIT)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W1908746084","https://openalex.org/W2012120492","https://openalex.org/W2065247540","https://openalex.org/W2099747733","https://openalex.org/W2111921000","https://openalex.org/W2124871568","https://openalex.org/W2595770559","https://openalex.org/W2738842617","https://openalex.org/W2798579099","https://openalex.org/W2964172142","https://openalex.org/W3021059754","https://openalex.org/W3034779354","https://openalex.org/W6820564557"],"related_works":["https://openalex.org/W1984349060","https://openalex.org/W3185201469","https://openalex.org/W1883409887","https://openalex.org/W2106870860","https://openalex.org/W2120250745","https://openalex.org/W1978469902","https://openalex.org/W3142592327","https://openalex.org/W20477621","https://openalex.org/W84307880","https://openalex.org/W2362053680"],"abstract_inverted_index":{"The":[0],"driving":[1],"force":[2],"behind":[3],"the":[4,23,52,57,99,114,117],"generation":[5],"of":[6,22,32,37,46,59,78,98,101,116,119,143],"biological":[7,41],"sequences":[8,15],"are":[9,69,95],"genomic":[10],"mutations":[11],"that":[12,26,128],"shape":[13],"these":[14,60,147],"throughout":[16],"their":[17],"evolutionary":[18],"history.":[19],"An":[20],"understanding":[21],"statistical":[24],"properties":[25],"result":[27],"from":[28],"mutation":[29],"processes":[30],"is":[31],"value":[33],"in":[34,89,122],"a":[35,86,106,135,141,158],"variety":[36],"tasks":[38],"related":[39],"to":[40,56,134],"sequence":[42],"data,":[43],"e.g.,":[44],"estimation":[45],"model":[47,108,144],"parameters":[48],"and":[49,66,82],"compression.":[50],"At":[51],"same":[53],"time,":[54],"due":[55],"complexity":[58],"processes,":[61],"designing":[62],"tractable":[63],"stochastic":[64,107,111],"models":[65],"analyzing":[67],"them":[68],"challenging.":[70],"In":[71,154],"this":[72],"paper,":[73],"we":[74,126,138,156],"study":[75,113],"two":[76],"types":[77],"mutations,":[79],"tandem":[80,91],"duplication":[81],"substitution.":[83],"These":[84],"play":[85],"critical":[87],"role":[88],"forming":[90],"repeat":[92],"regions,":[93],"which":[94,137],"common":[96],"features":[97],"genome":[100],"many":[102],"organisms.":[103],"We":[104],"provide":[105],"and,":[109],"via":[110],"approximation,":[112],"behavior":[115],"frequencies":[118,130],"N-":[120],"grams":[121],"resulting":[123],"sequences.":[124],"Specifically,":[125],"show":[127],"N-gram":[129],"converge":[131],"almost":[132],"surely":[133],"set":[136],"identify":[139],"as":[140],"function":[142],"parameters.":[145],"From":[146],"frequencies,":[148],"other":[149],"statistics":[150],"can":[151],"be":[152],"derived.":[153],"particular,":[155],"present":[157],"method":[159],"for":[160],"finding":[161],"upper":[162],"bounds":[163],"on":[164],"entropy.":[165]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
