{"id":"https://openalex.org/W4400600710","doi":"https://doi.org/10.48550/arxiv.2407.07279","title":"Towards a theory of learning dynamics in deep state space models","display_name":"Towards a theory of learning dynamics in deep state space models","publication_year":2024,"publication_date":"2024-07-10","ids":{"openalex":"https://openalex.org/W4400600710","doi":"https://doi.org/10.48550/arxiv.2407.07279"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2407.07279","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.07279","pdf_url":"https://arxiv.org/pdf/2407.07279","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2407.07279","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082366089","display_name":"Jakub Sm\u00e9kal","orcid":"https://orcid.org/0000-0003-4989-4968"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Sm\u00e9kal, Jakub","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010060188","display_name":"Jimmy T. H. Smith","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Smith, Jimmy T. H.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102738767","display_name":"Michael T. Kleinman","orcid":"https://orcid.org/0000-0003-2724-0066"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kleinman, Michael","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071084322","display_name":"Dan Biderman","orcid":"https://orcid.org/0000-0003-2054-8439"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Biderman, Dan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5054079047","display_name":"Scott W. Linderman","orcid":"https://orcid.org/0000-0002-3878-9073"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Linderman, Scott W.","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5082366089"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.8130999803543091,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.8130999803543091,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11270","display_name":"Complex Systems and Time Series Analysis","score":0.7249000072479248,"subfield":{"id":"https://openalex.org/subfields/2002","display_name":"Economics and Econometrics"},"field":{"id":"https://openalex.org/fields/20","display_name":"Economics, Econometrics and Finance"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/dynamics","display_name":"Dynamics (music)","score":0.6712483763694763},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.5491074919700623},{"id":"https://openalex.org/keywords/state-space","display_name":"State space","score":0.5107641220092773},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.4342820644378662},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4172881841659546},{"id":"https://openalex.org/keywords/statistical-physics","display_name":"Statistical physics","score":0.4125514030456543},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.34031370282173157},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.25220900774002075},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.19705107808113098},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.08271747827529907}],"concepts":[{"id":"https://openalex.org/C145912823","wikidata":"https://www.wikidata.org/wiki/Q113558","display_name":"Dynamics (music)","level":2,"score":0.6712483763694763},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.5491074919700623},{"id":"https://openalex.org/C72434380","wikidata":"https://www.wikidata.org/wiki/Q230930","display_name":"State space","level":2,"score":0.5107641220092773},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.4342820644378662},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4172881841659546},{"id":"https://openalex.org/C121864883","wikidata":"https://www.wikidata.org/wiki/Q677916","display_name":"Statistical physics","level":1,"score":0.4125514030456543},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34031370282173157},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.25220900774002075},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.19705107808113098},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.08271747827529907},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2407.07279","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.07279","pdf_url":"https://arxiv.org/pdf/2407.07279","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},{"id":"doi:10.48550/arxiv.2407.07279","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2407.07279","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2407.07279","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.07279","pdf_url":"https://arxiv.org/pdf/2407.07279","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W1636325709","https://openalex.org/W2724817865","https://openalex.org/W2400992695","https://openalex.org/W2382944939","https://openalex.org/W2771969352","https://openalex.org/W1993066527","https://openalex.org/W4300802590","https://openalex.org/W380379685","https://openalex.org/W1671315843","https://openalex.org/W2934102237"],"abstract_inverted_index":{"State":[0],"space":[1,133],"models":[2,21],"(SSMs)":[3],"have":[4],"shown":[5],"remarkable":[6],"empirical":[7],"performance":[8],"on":[9,62],"many":[10],"long":[11],"sequence":[12],"modeling":[13],"tasks,":[14],"but":[15],"a":[16,79,122,125],"theoretical":[17],"understanding":[18],"of":[19,33,51,87,113,127],"these":[20],"is":[22,121],"still":[23],"lacking.":[24],"In":[25],"this":[26],"work,":[27],"we":[28,77,93],"study":[29,112],"the":[30,49,63,67,85,111],"learning":[31,54,64,128],"dynamics":[32,65,86,129],"linear":[34,89],"SSMs":[35,83,115],"to":[36,110],"understand":[37],"how":[38,95],"covariance":[39],"structure":[40],"in":[41,66,106,130],"data,":[42],"latent":[43,96],"state":[44,97,132],"size,":[45],"and":[46,76,84,102],"initialization":[47],"affect":[48],"evolution":[50],"parameters":[52],"throughout":[53],"with":[55,116],"gradient":[56],"descent.":[57],"We":[58],"show":[59],"that":[60],"focusing":[61],"frequency":[68],"domain":[69],"affords":[70],"analytical":[71],"solutions":[72],"under":[73],"mild":[74],"assumptions,":[75],"establish":[78],"link":[80],"between":[81],"one-dimensional":[82],"deep":[88,114,131],"feed-forward":[90],"networks.":[91],"Finally,":[92],"analyze":[94],"over-parameterization":[98],"affects":[99],"convergence":[100],"time":[101],"describe":[103],"future":[104],"work":[105,120],"extending":[107],"our":[108],"results":[109],"nonlinear":[117],"connections.":[118],"This":[119],"step":[123],"toward":[124],"theory":[126],"models.":[134]},"counts_by_year":[],"updated_date":"2026-03-06T13:50:29.536080","created_date":"2025-10-10T00:00:00"}
