{"id":"https://openalex.org/W4393399301","doi":"https://doi.org/10.48550/arxiv.2403.19851","title":"Localizing Paragraph Memorization in Language Models","display_name":"Localizing Paragraph Memorization in Language Models","publication_year":2024,"publication_date":"2024-03-28","ids":{"openalex":"https://openalex.org/W4393399301","doi":"https://doi.org/10.48550/arxiv.2403.19851"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2403.19851","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.19851","pdf_url":"https://arxiv.org/pdf/2403.19851","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2403.19851","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5090115852","display_name":"Niklas Stoehr","orcid":"https://orcid.org/0000-0003-2867-0236"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Stoehr, Niklas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026198819","display_name":"Mitchell Gordon","orcid":"https://orcid.org/0000-0003-1008-2321"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gordon, Mitchell","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016054994","display_name":"Chiyuan Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Chiyuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5017133373","display_name":"Owen Lewis","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lewis, Owen","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5090115852"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9246000051498413,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/paragraph","display_name":"Paragraph","score":0.9329505562782288},{"id":"https://openalex.org/keywords/memorization","display_name":"Memorization","score":0.7016244530677795},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.5567227602005005},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5174003839492798},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4106295704841614},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.40714555978775024},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.14684703946113586},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.09761160612106323}],"concepts":[{"id":"https://openalex.org/C2777206241","wikidata":"https://www.wikidata.org/wiki/Q194431","display_name":"Paragraph","level":2,"score":0.9329505562782288},{"id":"https://openalex.org/C30038468","wikidata":"https://www.wikidata.org/wiki/Q4354775","display_name":"Memorization","level":2,"score":0.7016244530677795},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.5567227602005005},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5174003839492798},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4106295704841614},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.40714555978775024},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.14684703946113586},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.09761160612106323}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2403.19851","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.19851","pdf_url":"https://arxiv.org/pdf/2403.19851","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},{"id":"doi:10.48550/arxiv.2403.19851","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2403.19851","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2403.19851","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.19851","pdf_url":"https://arxiv.org/pdf/2403.19851","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2377059580","https://openalex.org/W4200355488","https://openalex.org/W127000293","https://openalex.org/W3215892509","https://openalex.org/W2928616779","https://openalex.org/W2412592434","https://openalex.org/W2010523086","https://openalex.org/W4244602709","https://openalex.org/W594987446"],"abstract_inverted_index":{"Can":[0],"we":[1,25,107],"localize":[2,72],"the":[3,59,68,114,117,124,128,141],"weights":[4],"and":[5,14,35,122],"mechanisms":[6],"used":[7],"by":[8,65,119],"a":[9,43,73,102,136],"language":[10],"model":[11,36,51],"to":[12,79,151,155],"memorize":[13],"recite":[15],"entire":[16,142],"paragraphs":[17,41],"of":[18,39,55],"its":[19,91],"training":[20],"data?":[21],"In":[22],"this":[23],"paper,":[24],"show":[26],"that":[27,77,97],"while":[28],"memorization":[29,111],"is":[30,88,112],"spread":[31],"across":[32,113],"multiple":[33],"layers":[34,52],"components,":[37],"gradients":[38,54],"memorized":[40,60,145],"have":[42],"distinguishable":[44],"spatial":[45],"pattern,":[46],"being":[47],"larger":[48],"in":[49,83,101,116,127,135],"lower":[50],"than":[53,157],"non-memorized":[56,158],"examples.":[57],"Moreover,":[58],"examples":[61],"can":[62,138],"be":[63,80],"unlearned":[64],"fine-tuning":[66],"only":[67,149],"high-gradient":[69],"weights.":[70],"We":[71],"low-layer":[74],"attention":[75,92],"head":[76,87],"appears":[78],"especially":[81],"involved":[82],"paragraph":[84],"memorization.":[85],"This":[86],"predominantly":[89],"focusing":[90],"on":[93],"distinctive,":[94],"rare":[95],"tokens":[96,115,121,133],"are":[98,147],"least":[99],"frequent":[100],"corpus-level":[103],"unigram":[104],"distribution.":[105],"Next,":[106],"study":[108],"how":[109],"localized":[110],"prefix":[118,137],"perturbing":[120],"measuring":[123],"caused":[125],"change":[126],"decoding.":[129],"A":[130],"few":[131],"distinctive":[132],"early":[134],"often":[139],"corrupt":[140,156],"continuation.":[143],"Overall,":[144],"continuations":[146],"not":[148],"harder":[150],"unlearn,":[152],"but":[153],"also":[154],"ones.":[159]},"counts_by_year":[],"updated_date":"2026-03-13T16:22:10.518609","created_date":"2025-10-10T00:00:00"}
