{"id":"https://openalex.org/W7140236058","doi":"https://doi.org/10.48550/arxiv.2603.20246","title":"Decoding the decoder: Contextual sequence-to-sequence modeling for intracortical speech decoding","display_name":"Decoding the decoder: Contextual sequence-to-sequence modeling for intracortical speech decoding","publication_year":2026,"publication_date":"2026-03-10","ids":{"openalex":"https://openalex.org/W7140236058","doi":"https://doi.org/10.48550/arxiv.2603.20246"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.20246","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.20246","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.20246","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Olak, Michal","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Olak, Michal","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Boccato, Tommaso","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Boccato, Tommaso","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Ferrante, Matteo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ferrante, Matteo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10429","display_name":"EEG and Brain-Computer Interfaces","score":0.8400999903678894,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10429","display_name":"EEG and Brain-Computer Interfaces","score":0.8400999903678894,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10241","display_name":"Functional Brain Connectivity Studies","score":0.02969999983906746,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10465","display_name":"Neurobiology of Language and Bilingualism","score":0.02969999983906746,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.8501999974250793},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.531499981880188},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5116999745368958},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5026999711990356},{"id":"https://openalex.org/keywords/chunking","display_name":"Chunking (psychology)","score":0.5020999908447266},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.44029998779296875},{"id":"https://openalex.org/keywords/neural-decoding","display_name":"Neural decoding","score":0.4156000018119812},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.3864000141620636}],"concepts":[{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.8501999974250793},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7390999794006348},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6906999945640564},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.531499981880188},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5116999745368958},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5026999711990356},{"id":"https://openalex.org/C203357204","wikidata":"https://www.wikidata.org/wiki/Q1089605","display_name":"Chunking (psychology)","level":2,"score":0.5020999908447266},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.44029998779296875},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41659998893737793},{"id":"https://openalex.org/C40743351","wikidata":"https://www.wikidata.org/wiki/Q7002049","display_name":"Neural decoding","level":3,"score":0.4156000018119812},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.3864000141620636},{"id":"https://openalex.org/C130727458","wikidata":"https://www.wikidata.org/wiki/Q1639109","display_name":"Coarticulation","level":3,"score":0.37770000100135803},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.3319999873638153},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.3066999912261963},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2802000045776367},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.2800000011920929},{"id":"https://openalex.org/C43617652","wikidata":"https://www.wikidata.org/wiki/Q7575399","display_name":"Speech production","level":2,"score":0.27160000801086426},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.2671999931335449},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.2648000121116638},{"id":"https://openalex.org/C81444415","wikidata":"https://www.wikidata.org/wiki/Q7243535","display_name":"Priming (agriculture)","level":3,"score":0.25220000743865967}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.20246","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.20246","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.20246","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.20246","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.6145884990692139,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Speech":[0],"brain--computer":[1],"interfaces":[2],"require":[3],"decoders":[4],"that":[5,196,213],"translate":[6],"intracortical":[7,66,208],"activity":[8],"into":[9],"linguistic":[10],"output":[11],"while":[12,161],"remaining":[13],"robust":[14],"to":[15,45,155],"limited":[16],"data":[17],"and":[18,50,76,105,111,138,144,151,182,190,211,227],"day-to-day":[19,82],"variability.":[20],"While":[21],"prior":[22],"high-performing":[23],"systems":[24],"have":[25],"largely":[26],"relied":[27],"on":[28,167],"framewise":[29],"phoneme":[30,72,125,150,189],"decoding":[31,43,62,131,137,153],"combined":[32],"with":[33,97,135,141,170],"downstream":[34],"language":[35],"models,":[36],"it":[37],"remains":[38],"unclear":[39],"what":[40],"contextual":[41,197],"sequence-to-sequence":[42,57,198],"contributes":[44],"sublexical":[46],"neural":[47,222],"readout,":[48],"robustness,":[49],"interpretability.":[51],"We":[52,100],"evaluated":[53],"a":[54,123],"multitask":[55],"Transformer-based":[56],"model":[58,69,121],"for":[59],"attempted":[60],"speech":[61,209,223],"from":[63,207],"area":[64],"6v":[65],"recordings.":[67],"The":[68],"jointly":[70],"predicts":[71],"sequences,":[73,75],"word":[74,152,191],"auxiliary":[77],"acoustic":[78],"features.":[79],"To":[80],"address":[81],"nonstationarity,":[83],"we":[84],"introduced":[85],"the":[86,109,114,119,202],"Neural":[87],"Hammer":[88],"Scalpel":[89],"(NHS)":[90],"calibration":[91],"module,":[92],"which":[93],"combines":[94],"global":[95],"alignment":[96],"feature-wise":[98],"modulation.":[99],"further":[101],"analyzed":[102],"held-out-day":[103,162],"generalization":[104],"attention":[106],"patterns":[107],"in":[108,179],"encoder":[110,180],"decoders.":[112,192],"On":[113],"Willett":[115],"et":[116],"al.":[117],"dataset,":[118],"proposed":[120],"achieved":[122],"state-of-the-art":[124],"error":[126],"rate":[127],"of":[128,185,204],"14.3%.":[129],"Word":[130],"reached":[132],"25.6%":[133],"WER":[134,140],"direct":[136],"19.4%":[139],"candidate":[142],"generation":[143],"rescoring.":[145],"NHS":[146],"substantially":[147],"improved":[148],"both":[149],"relative":[154],"linear":[156],"or":[157],"no":[158],"day-specific":[159],"transform,":[160],"experiments":[163],"showed":[164],"increasing":[165],"degradation":[166],"unseen":[168],"days":[169],"temporal":[171,177],"distance.":[172],"Attention":[173],"visualizations":[174],"revealed":[175],"recurring":[176],"chunking":[178],"representations":[181],"distinct":[183],"use":[184],"these":[186],"segments":[187],"by":[188],"These":[193],"results":[194],"indicate":[195],"modeling":[199],"can":[200,216],"improve":[201],"fidelity":[203],"neural-to-phoneme":[205],"readout":[206],"signals":[210],"suggest":[212],"attention-based":[214],"analyses":[215],"generate":[217],"useful":[218],"hypotheses":[219],"about":[220],"how":[221],"evidence":[224],"is":[225],"segmented":[226],"accumulated":[228],"over":[229],"time.":[230]},"counts_by_year":[],"updated_date":"2026-04-25T08:17:42.794288","created_date":"2026-03-25T00:00:00"}
