{"id":"https://openalex.org/W3032615300","doi":"https://doi.org/10.7488/era/389","title":"Flexible neural architectures for sequence modeling","display_name":"Flexible neural architectures for sequence modeling","publication_year":2020,"publication_date":"2020-05-26","ids":{"openalex":"https://openalex.org/W3032615300","doi":"https://doi.org/10.7488/era/389","mag":"3032615300"},"language":"en","primary_location":{"id":"pmh:oai:era.ed.ac.uk:1842/37088","is_oa":true,"landing_page_url":"https://hdl.handle.net/1842/37088","pdf_url":"https://hdl.handle.net/1842/37088","source":{"id":"https://openalex.org/S7407055182","display_name":"ERA","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"PhD Doctor of Philosophy"},"type":"dissertation","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://hdl.handle.net/1842/37088","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5005921350","display_name":"Benjamin Krause","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Krause, Benjamin","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5005921350"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.8331000208854675,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.8331000208854675,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.5626003742218018},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.47614407539367676},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.43119120597839355},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3488481044769287},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.13548675179481506},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.06598922610282898}],"concepts":[{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.5626003742218018},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.47614407539367676},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.43119120597839355},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3488481044769287},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.13548675179481506},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.06598922610282898}],"mesh":[],"locations_count":3,"locations":[{"id":"pmh:oai:era.ed.ac.uk:1842/37088","is_oa":true,"landing_page_url":"https://hdl.handle.net/1842/37088","pdf_url":"https://hdl.handle.net/1842/37088","source":{"id":"https://openalex.org/S7407055182","display_name":"ERA","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"PhD Doctor of Philosophy"},{"id":"doi:10.7488/era/389","is_oa":true,"landing_page_url":"https://doi.org/10.7488/era/389","pdf_url":null,"source":{"id":"https://openalex.org/S7407050748","display_name":"University of Edinburgh","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"},{"id":"mag:3032615300","is_oa":false,"landing_page_url":"https://era.ed.ac.uk/handle/1842/37088","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":null}],"best_oa_location":{"id":"pmh:oai:era.ed.ac.uk:1842/37088","is_oa":true,"landing_page_url":"https://hdl.handle.net/1842/37088","pdf_url":"https://hdl.handle.net/1842/37088","source":{"id":"https://openalex.org/S7407055182","display_name":"ERA","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"PhD Doctor of Philosophy"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3032615300.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2520337371","https://openalex.org/W2137362580","https://openalex.org/W2899399039","https://openalex.org/W2920714358","https://openalex.org/W2539726002","https://openalex.org/W1565849731","https://openalex.org/W2550574616","https://openalex.org/W3200711887","https://openalex.org/W3146266578","https://openalex.org/W2556095944","https://openalex.org/W2563044023","https://openalex.org/W3150849232","https://openalex.org/W2963432161","https://openalex.org/W3205149504","https://openalex.org/W2407040763","https://openalex.org/W2043039667","https://openalex.org/W1566053249","https://openalex.org/W1509167562","https://openalex.org/W2132583697","https://openalex.org/W1598434182"],"abstract_inverted_index":{"Auto-regressive":[0],"sequence":[1,15,86,92,191,196,392],"models":[2,43,192,306,338,355],"can":[3,149],"estimate":[4],"the":[5,19,90,194,256,260,318,375,378],"distribution":[6],"of":[7,10,21,31,127,221,235,239,258,277,285,328,377,391],"any":[8],"type":[9],"sequential":[11,209],"data.":[12],"To":[13],"study":[14],"models,":[16],"we":[17,53],"consider":[18],"problem":[20],"language":[22,38,109,182,292,354,381],"modeling,":[23,382],"which":[24,59,81],"entails":[25],"predicting":[26,345],"probability":[27,241],"distributions":[28,242],"over":[29,294],"sequences":[30],"text.":[32],"This":[33],"thesis":[34,227],"improves":[35,111],"on":[36,55,70,352],"previous":[37,176,353],"modeling":[39,110,293,326,393],"approaches":[40,107,350],"by":[41,114,140,356],"giving":[42,357],"additional":[44,223,359],"flexibility":[45,62,333,360],"to":[46,48,63,89,94,143,193,207,243,282,291,310,341,361,363,372,374,387],"adapt":[47,88,362],"their":[49,112,244,364],"inputs.":[50,365],"In":[51],"particular,":[52],"focus":[54],"multiplicative":[56,129],"LSTM":[57,83,164],"(mLSTM),":[58],"has":[60],"added":[61,332],"change":[64],"its":[65,71,141],"recurrent":[66,130,145],"transition":[67,146],"function":[68],"depending":[69],"input":[72,155],"as":[73,161,218,236,267,269,298,300],"compared":[74,162],"with":[75,163,255],"traditional":[76],"LSTM,":[77],"and":[78,121,134,157,250,264,321,367,383],"dynamic":[79,212,230,252,278,335,368],"evaluation,":[80],"helps":[82],"(or":[84],"other":[85],"models)":[87],"recent":[91,195],"history":[93,197],"exploit":[95],"re-occurring":[96,208],"patterns":[97,316],"within":[98],"a":[99,125,128,187,219,237,283,295,388],"sequence.":[100],"We":[101,247],"find":[102],"that":[103,148,229,314,334],"using":[104,201,222],"these":[105,272],"adaptive":[106],"for":[108,152,189],"predictions":[113,160],"helping":[115],"them":[116,340,358],"recover":[117,342],"from":[118,317],"surprising":[119],"tokens":[120],"sequences.":[122,330,347],"mLSTM":[123,137,172,366],"is":[124,138,186,232],"hybrid":[126],"neural":[131,177],"network":[132],"(mRNN)":[133],"an":[135],"LSTM.":[136],"characterized":[139],"ability":[142],"have":[144,384],"functions":[147],"vary":[150],"more":[151],"each":[153],"possible":[154],"token,":[156],"makes":[158],"better":[159,233],"after":[165],"viewing":[166],"unexpected":[167,346],"inputs":[168],"in":[169,289,325,380],"our":[170],"experiments.":[171],"also":[173,248,308],"outperformed":[174],"all":[175],"architectures":[178],"at":[179,198],"character":[180],"level":[181],"modeling.":[183],"Dynamic":[184],"evaluation":[185,213,231,253,279,336,369],"method":[188],"adapting":[190,240],"inference":[199],"time":[200],"gradient":[202],"descent,":[203],"assigning":[204],"higher":[205],"probabilities":[206],"patterns.":[210],"While":[211],"was":[214],"often":[215],"previously":[216],"viewed":[217],"way":[220,238],"training":[224],"data,":[225],"this":[226],"argues":[228],"thought":[234],"own":[245],"predictions.":[246],"explore":[249],"develop":[251],"methods":[254,273],"goals":[257],"achieving":[259],"best":[261],"prediction":[262],"performance":[263],"computational/memory":[265],"efficiency,":[266],"well":[268,299],"understanding":[270],"why":[271],"work.":[274],"Different":[275],"variants":[276],"are":[280,307],"applied":[281],"number":[284],"different":[286],"architectures,":[287],"resulting":[288],"improvements":[290,373],"longer":[296],"contexts,":[297],"polyphonic":[301],"music":[302],"prediction.":[303],"Dynamically":[304],"evaluated":[305],"able":[309],"generate":[311],"conditional":[312],"samples":[313],"repeat":[315],"conditioning":[319],"text,":[320],"achieve":[322],"improved":[323],"generalization":[324],"out":[327],"domain":[329],"The":[331,348],"gives":[337],"allows":[339],"faster":[343],"when":[344],"proposed":[349],"improve":[351],"both":[370],"contributed":[371],"state":[376],"art":[379],"potential":[385],"applications":[386],"wider":[389],"range":[390],"problems.":[394]},"counts_by_year":[],"updated_date":"2026-03-02T08:37:19.008085","created_date":"2025-10-10T00:00:00"}
