{"id":"https://openalex.org/W2074526138","doi":"https://doi.org/10.1089/10665270360688020","title":"Monotony of Surprise and Large-Scale Quest for Unusual Words","display_name":"Monotony of Surprise and Large-Scale Quest for Unusual Words","publication_year":2003,"publication_date":"2003-06-01","ids":{"openalex":"https://openalex.org/W2074526138","doi":"https://doi.org/10.1089/10665270360688020","mag":"2074526138","pmid":"https://pubmed.ncbi.nlm.nih.gov/12935329"},"language":"en","primary_location":{"id":"doi:10.1089/10665270360688020","is_oa":false,"landing_page_url":"https://doi.org/10.1089/10665270360688020","pdf_url":null,"source":{"id":"https://openalex.org/S78571599","display_name":"Journal of Computational Biology","issn_l":"1066-5277","issn":["1066-5277","1557-8666"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320443","host_organization_name":"Mary Ann Liebert, Inc.","host_organization_lineage":["https://openalex.org/P4310320443"],"host_organization_lineage_names":["Mary Ann Liebert, Inc."],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Computational Biology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5029039590","display_name":"Alberto Apostolico","orcid":null},"institutions":[{"id":"https://openalex.org/I138689650","display_name":"University of Padua","ror":"https://ror.org/00240q980","country_code":"IT","type":"education","lineage":["https://openalex.org/I138689650"]},{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["IT","US"],"is_corresponding":true,"raw_author_name":"Alberto Apostolico","raw_affiliation_strings":["Department of Computer Sciences, Purdue University, West Lafayette, IN 47907, and Dipartimento di Ingegneria dell'Informazione, Universit\u00e0 di Padova,          Padova, Italy","Department of Computer Sciences, Purdue University, West Lafayette, IN 47907, and Dipartimento di Ingegneria dell'Informazione, Universit\u00e0 di Padova, Padova, Italy"],"affiliations":[{"raw_affiliation_string":"Department of Computer Sciences, Purdue University, West Lafayette, IN 47907, and Dipartimento di Ingegneria dell'Informazione, Universit\u00e0 di Padova,          Padova, Italy","institution_ids":["https://openalex.org/I219193219","https://openalex.org/I138689650"]},{"raw_affiliation_string":"Department of Computer Sciences, Purdue University, West Lafayette, IN 47907, and Dipartimento di Ingegneria dell'Informazione, Universit\u00e0 di Padova, Padova, Italy","institution_ids":["https://openalex.org/I219193219","https://openalex.org/I138689650"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108798662","display_name":"Mary Ellen Bock","orcid":null},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mary Ellen Bock","raw_affiliation_strings":["Department of Statistics, Purdue University, West Lafayette, IN 47907"],"affiliations":[{"raw_affiliation_string":"Department of Statistics, Purdue University, West Lafayette, IN 47907","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042005042","display_name":"Stefano Lonardi","orcid":"https://orcid.org/0000-0002-2696-7274"},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Stefano Lonardi","raw_affiliation_strings":["Department of Computer Science and Engineering, University of California, Riverside, CA 92521"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, University of California, Riverside, CA 92521","institution_ids":["https://openalex.org/I103635307"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5029039590"],"corresponding_institution_ids":["https://openalex.org/I138689650","https://openalex.org/I219193219"],"apc_list":null,"apc_paid":null,"fwci":11.2361,"has_fulltext":false,"cited_by_count":81,"citation_normalized_percentile":{"value":0.98377605,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"10","issue":"3-4","first_page":"283","last_page":"311"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9835000038146973,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10521","display_name":"RNA and protein synthesis mechanisms","score":0.9811000227928162,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/substring","display_name":"Substring","score":0.9501349925994873},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7309257984161377},{"id":"https://openalex.org/keywords/surprise","display_name":"Surprise","score":0.5975998640060425},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.5421087145805359},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.5354262590408325},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.4930104911327362},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.4908624291419983},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.448272705078125},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.4442680776119232},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4326319098472595},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.42827731370925903},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.375120609998703},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.34939897060394287},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.33362534642219543},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1742105484008789},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.15776270627975464}],"concepts":[{"id":"https://openalex.org/C182407805","wikidata":"https://www.wikidata.org/wiki/Q2626534","display_name":"Substring","level":3,"score":0.9501349925994873},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7309257984161377},{"id":"https://openalex.org/C2780343955","wikidata":"https://www.wikidata.org/wiki/Q333173","display_name":"Surprise","level":2,"score":0.5975998640060425},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.5421087145805359},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.5354262590408325},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.4930104911327362},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.4908624291419983},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.448272705078125},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.4442680776119232},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4326319098472595},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.42827731370925903},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.375120609998703},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.34939897060394287},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33362534642219543},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1742105484008789},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.15776270627975464},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D003627","descriptor_name":"Data Interpretation, Statistical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D003627","descriptor_name":"Data Interpretation, Statistical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D003627","descriptor_name":"Data Interpretation, Statistical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D008390","descriptor_name":"Markov Chains","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008390","descriptor_name":"Markov Chains","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008390","descriptor_name":"Markov Chains","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016010","descriptor_name":"Binomial Distribution","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016010","descriptor_name":"Binomial Distribution","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016010","descriptor_name":"Binomial Distribution","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false}],"locations_count":3,"locations":[{"id":"doi:10.1089/10665270360688020","is_oa":false,"landing_page_url":"https://doi.org/10.1089/10665270360688020","pdf_url":null,"source":{"id":"https://openalex.org/S78571599","display_name":"Journal of Computational Biology","issn_l":"1066-5277","issn":["1066-5277","1557-8666"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320443","host_organization_name":"Mary Ann Liebert, Inc.","host_organization_lineage":["https://openalex.org/P4310320443"],"host_organization_lineage_names":["Mary Ann Liebert, Inc."],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Computational Biology","raw_type":"journal-article"},{"id":"pmid:12935329","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/12935329","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of computational biology : a journal of computational molecular cell biology","raw_type":null},{"id":"pmh:oai:www.research.unipd.it:11577/1363086","is_oa":false,"landing_page_url":"http://hdl.handle.net/11577/1363086","pdf_url":null,"source":{"id":"https://openalex.org/S4377196283","display_name":"Research Padua  Archive (University of Padua)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I138689650","host_organization_name":"University of Padua","host_organization_lineage":["https://openalex.org/I138689650"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W2014459399","https://openalex.org/W2030839740","https://openalex.org/W2053392780","https://openalex.org/W2077299429","https://openalex.org/W2082886749","https://openalex.org/W2088082060","https://openalex.org/W2109973210","https://openalex.org/W2121271225","https://openalex.org/W2525716561","https://openalex.org/W3016218058","https://openalex.org/W4243495532"],"related_works":["https://openalex.org/W4236382845","https://openalex.org/W1583922594","https://openalex.org/W2998448420","https://openalex.org/W1974038726","https://openalex.org/W1953626159","https://openalex.org/W4388712630","https://openalex.org/W1598224733","https://openalex.org/W4304731099","https://openalex.org/W3095240633","https://openalex.org/W75168880"],"abstract_inverted_index":{"The":[0,58],"problem":[1],"of":[2,53,65,119,122,128,147,154,160,166,170],"characterizing":[3],"and":[4,14,33,56,71,74,81,85,105,125,163,175],"detecting":[5],"recurrent":[6],"sequence":[7],"patterns":[8,67],"such":[9,66,148],"as":[10],"substrings":[11,108,172],"or":[12,17,42],"motifs":[13],"related":[15],"associations":[16],"rules":[18],"is":[19],"variously":[20],"pursued":[21],"in":[22,45,50,78,173,178],"order":[23],"to":[24,94,101,112,132],"compress":[25],"data,":[26],"unveil":[27],"structure,":[28],"infer":[29],"succinct":[30],"descriptions,":[31],"extract":[32],"classify":[34],"features,":[35],"etc.":[36],"In":[37,96,138],"molecular":[38],"biology,":[39],"exceptionally":[40],"frequent":[41],"rare":[43],"words":[44],"bio-sequences":[46],"have":[47],"been":[48,110],"implicated":[49],"various":[51,183],"facets":[52],"biological":[54],"function":[55],"structure.":[57],"discovery,":[59],"particularly":[60],"on":[61],"a":[62,113,123,151],"massive":[63],"scale,":[64],"poses":[68],"interesting":[69],"methodological":[70],"algorithmic":[72],"problems":[73],"often":[75],"exposes":[76],"scenarios":[77],"which":[79],"tables":[80],"synopses":[82],"grow":[83],"faster":[84],"bigger":[86],"than":[87],"the":[88,99,117,120,134,158,179],"raw":[89],"sequences":[90],"they":[91],"are":[92],"meant":[93],"encapsulate.":[95],"previous":[97],"study,":[98],"ability":[100],"succinctly":[102],"compute,":[103],"store,":[104],"display":[106],"unusual":[107,171],"has":[109],"linked":[111],"subtle":[114],"interplay":[115],"between":[116],"combinatorics":[118],"subword":[121],"word":[124],"local":[126],"monotonicities":[127,149],"some":[129],"scores":[130],"used":[131],"measure":[133],"departure":[135],"from":[136],"expectation.":[137],"this":[139],"paper,":[140],"we":[141],"carry":[142],"out":[143],"an":[144],"extensive":[145],"analysis":[146],"for":[150],"broader":[152],"variety":[153],"scores.":[155],"This":[156],"supports":[157],"construction":[159],"data":[161],"structures":[162],"algorithms":[164],"capable":[165],"performing":[167],"global":[168],"detection":[169],"time":[174],"space":[176],"linear":[177],"subject":[180],"sequences,":[181],"under":[182],"probabilistic":[184],"models.":[185]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":4},{"year":2017,"cited_by_count":4},{"year":2016,"cited_by_count":3},{"year":2015,"cited_by_count":5},{"year":2014,"cited_by_count":1},{"year":2013,"cited_by_count":7},{"year":2012,"cited_by_count":5}],"updated_date":"2026-04-17T18:11:37.981687","created_date":"2025-10-10T00:00:00"}
