{"id":"https://openalex.org/W4288043168","doi":"https://doi.org/10.48550/arxiv.2207.11240","title":"Discrete Key-Value Bottleneck","display_name":"Discrete Key-Value Bottleneck","publication_year":2022,"publication_date":"2022-07-22","ids":{"openalex":"https://openalex.org/W4288043168","doi":"https://doi.org/10.48550/arxiv.2207.11240"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2207.11240","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2207.11240","pdf_url":"https://arxiv.org/pdf/2207.11240","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2207.11240","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5012479449","display_name":"Frederik Tr\u00e4uble","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Tr\u00e4uble, Frederik","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016769717","display_name":"Anirudh Goyal","orcid":"https://orcid.org/0000-0002-4080-1940"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Goyal, Anirudh","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037769450","display_name":"Nasim Rahaman","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rahaman, Nasim","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047726287","display_name":"Michael C. Mozer","orcid":"https://orcid.org/0000-0002-9654-0575"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mozer, Michael","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003184366","display_name":"Kenji Kawaguchi","orcid":"https://orcid.org/0000-0002-5361-9793"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kawaguchi, Kenji","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086198262","display_name":"Yoshua Bengio","orcid":"https://orcid.org/0000-0002-9322-3515"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bengio, Yoshua","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5044005697","display_name":"Bernhard Sch\u00f6lkopf","orcid":"https://orcid.org/0000-0002-8177-0925"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sch\u00f6lkopf, Bernhard","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5012479449"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9933000206947327,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9825000166893005,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.8485460877418518},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7530031204223633},{"id":"https://openalex.org/keywords/forgetting","display_name":"Forgetting","score":0.6682913303375244},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.643762469291687},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.6378764510154724},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5757017135620117},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5743753910064697},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.559298038482666},{"id":"https://openalex.org/keywords/information-bottleneck-method","display_name":"Information bottleneck method","score":0.5528442859649658},{"id":"https://openalex.org/keywords/data-stream-mining","display_name":"Data stream mining","score":0.5483468770980835},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.5198253393173218},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5028244853019714},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.47982025146484375},{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.47741377353668213},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4273764193058014},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.14074426889419556}],"concepts":[{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.8485460877418518},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7530031204223633},{"id":"https://openalex.org/C7149132","wikidata":"https://www.wikidata.org/wiki/Q1377840","display_name":"Forgetting","level":2,"score":0.6682913303375244},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.643762469291687},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.6378764510154724},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5757017135620117},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5743753910064697},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.559298038482666},{"id":"https://openalex.org/C60008888","wikidata":"https://www.wikidata.org/wiki/Q6031013","display_name":"Information bottleneck method","level":3,"score":0.5528442859649658},{"id":"https://openalex.org/C89198739","wikidata":"https://www.wikidata.org/wiki/Q3079880","display_name":"Data stream mining","level":2,"score":0.5483468770980835},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.5198253393173218},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5028244853019714},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.47982025146484375},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.47741377353668213},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4273764193058014},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.14074426889419556},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2207.11240","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2207.11240","pdf_url":"https://arxiv.org/pdf/2207.11240","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2207.11240","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2207.11240","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2207.11240","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2207.11240","pdf_url":"https://arxiv.org/pdf/2207.11240","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"score":0.46000000834465027,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2622284819","https://openalex.org/W1504394672","https://openalex.org/W3089381707","https://openalex.org/W4285254085","https://openalex.org/W3034190530","https://openalex.org/W2741297526","https://openalex.org/W4295728955","https://openalex.org/W3129794609","https://openalex.org/W2949033103","https://openalex.org/W2304083841"],"abstract_inverted_index":{"Deep":[0],"neural":[1],"networks":[2],"perform":[3],"well":[4],"on":[5,42,250],"classification":[6],"tasks":[7],"where":[8],"data":[9,15,23],"streams":[10,24],"are":[11,152],"i.i.d.":[12],"and":[13,75,108,125,148,167,180,203,225],"labeled":[14],"is":[16,63,130,141],"abundant.":[17],"Challenges":[18],"emerge":[19],"with":[20],"non-stationary":[21],"training":[22],"such":[25],"as":[26,65,76],"continual":[27],"learning.":[28],"One":[29],"powerful":[30],"approach":[31],"that":[32,205,227],"has":[33],"addressed":[34],"this":[35,97,251],"challenge":[36],"involves":[37],"pre-training":[38],"of":[39,44,60,69,106,138,172,189,198,210,244],"large":[40,67],"encoders":[41,62],"volumes":[43],"readily":[45],"available":[46],"data,":[47],"followed":[48],"by":[49],"task-specific":[50],"tuning.":[51],"Given":[52],"a":[53,66,77,92,101,122,169,241],"new":[54],"task,":[55],"however,":[56],"updating":[57],"the":[58,83,87,119,128,133,136,139,145,149,155,159,187,190,196,208,211,217,228],"weights":[59,70],"these":[61,173],"challenging":[64,221],"number":[68,171],"needs":[71],"to":[72,95,116,132,143,154,157,194],"be":[73,115],"fine-tuned,":[74],"result,":[78],"they":[79],"forget":[80],"information":[81],"about":[82],"previous":[84],"tasks.":[85],"In":[86],"present":[88],"work,":[89],"we":[90],"propose":[91],"model":[93,163,182,230],"architecture":[94],"address":[96],"issue,":[98],"building":[99],"upon":[100],"discrete":[102,123,191],"bottleneck":[103,193],"containing":[104],"pairs":[105,175],"separate":[107],"learnable":[109],"key-value":[110,174,192],"codes.":[111],"Our":[112],"paradigm":[113],"will":[114],"encode;":[117],"process":[118],"representation":[120],"via":[121],"bottleneck;":[124],"decode.":[126],"Here,":[127],"input":[129],"fed":[131,153],"pre-trained":[134,245],"encoder,":[135],"output":[137],"encoder":[140],"used":[142],"select":[144],"nearest":[146],"keys,":[147],"corresponding":[150],"values":[151],"decoder":[156],"solve":[158],"current":[160],"task.":[161,252],"The":[162],"can":[164],"only":[165],"fetch":[166],"re-use":[168],"sparse":[170],"during":[176],"inference,":[177],"enabling":[178],"localized":[179],"context-dependent":[181],"updates.":[183],"We":[184,214],"theoretically":[185],"investigate":[186],"ability":[188],"minimize":[195],"effect":[197],"learning":[199,223],"under":[200,220],"distribution":[201],"shifts":[202],"show":[204,226],"it":[206],"reduces":[207,237],"complexity":[209],"hypothesis":[212],"class.":[213],"empirically":[215],"verify":[216],"proposed":[218,229],"method":[219],"class-incremental":[222],"scenarios":[224],"-":[231,236],"without":[232],"any":[233],"task":[234],"boundaries":[235],"catastrophic":[238],"forgetting":[239],"across":[240],"wide":[242],"variety":[243],"models,":[246],"outperforming":[247],"relevant":[248],"baselines":[249]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":4}],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
