{"id":"https://openalex.org/W4396780921","doi":"https://doi.org/10.48550/arxiv.2405.01964","title":"Position: Understanding LLMs Requires More Than Statistical Generalization","display_name":"Position: Understanding LLMs Requires More Than Statistical Generalization","publication_year":2024,"publication_date":"2024-05-03","ids":{"openalex":"https://openalex.org/W4396780921","doi":"https://doi.org/10.48550/arxiv.2405.01964"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2405.01964","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2405.01964","pdf_url":"https://arxiv.org/pdf/2405.01964","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2405.01964","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034660028","display_name":"Patrik Reizinger","orcid":"https://orcid.org/0000-0001-9861-0293"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Reizinger, Patrik","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027803852","display_name":"Szilvia Ujv\u00e1ry","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ujv\u00e1ry, Szilvia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109951718","display_name":"Anna M\u00e9sz\u00e1ros","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"M\u00e9sz\u00e1ros, Anna","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044975358","display_name":"Anna Kerekes","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kerekes, Anna","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086801305","display_name":"Wieland Brendel","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Brendel, Wieland","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5052054395","display_name":"Ferenc Husz\u00e1r","orcid":"https://orcid.org/0000-0002-4988-1430"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Husz\u00e1r, Ferenc","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13643","display_name":"Artificial Intelligence in Law","score":0.2492000013589859,"subfield":{"id":"https://openalex.org/subfields/3320","display_name":"Political Science and International Relations"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T13643","display_name":"Artificial Intelligence in Law","score":0.2492000013589859,"subfield":{"id":"https://openalex.org/subfields/3320","display_name":"Political Science and International Relations"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.7707871198654175},{"id":"https://openalex.org/keywords/econometrics","display_name":"Econometrics","score":0.37849634885787964},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.26402539014816284}],"concepts":[{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.7707871198654175},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.37849634885787964},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.26402539014816284},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2405.01964","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2405.01964","pdf_url":"https://arxiv.org/pdf/2405.01964","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2405.01964","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2405.01964","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2405.01964","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2405.01964","pdf_url":"https://arxiv.org/pdf/2405.01964","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5415233182","display_name":null,"funder_award_id":"01IS18039A","funder_id":"https://openalex.org/F4320321114","funder_display_name":"Bundesministerium f\u00fcr Bildung und Forschung"},{"id":"https://openalex.org/G7473225590","display_name":null,"funder_award_id":"INST 37/1057-1 FUGG","funder_id":"https://openalex.org/F4320320879","funder_display_name":"Deutsche Forschungsgemeinschaft"},{"id":"https://openalex.org/G7907426250","display_name":null,"funder_award_id":"390727645","funder_id":"https://openalex.org/F4320320879","funder_display_name":"Deutsche Forschungsgemeinschaft"},{"id":"https://openalex.org/G8915364236","display_name":null,"funder_award_id":"2064/1","funder_id":"https://openalex.org/F4320320879","funder_display_name":"Deutsche Forschungsgemeinschaft"}],"funders":[{"id":"https://openalex.org/F4320317967","display_name":"International Max Planck Research School for Environmental, Cellular and Molecular Microbiology","ror":null},{"id":"https://openalex.org/F4320319740","display_name":"Good Ventures Foundation","ror":null},{"id":"https://openalex.org/F4320320879","display_name":"Deutsche Forschungsgemeinschaft","ror":"https://ror.org/018mejw64"},{"id":"https://openalex.org/F4320321114","display_name":"Bundesministerium f\u00fcr Bildung und Forschung","ror":"https://ror.org/04pz7b180"},{"id":"https://openalex.org/F4320327553","display_name":"International Max Planck Research School for Advanced Methods in Process and Systems Engineering","ror":null}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4396780921.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W3162204513","https://openalex.org/W2371138613","https://openalex.org/W2048963458","https://openalex.org/W43109613","https://openalex.org/W2359952343","https://openalex.org/W2239445980","https://openalex.org/W2080152487","https://openalex.org/W3083152911","https://openalex.org/W3022347918","https://openalex.org/W4200527723"],"abstract_inverted_index":{"The":[0],"last":[1],"decade":[2],"has":[3,114],"seen":[4],"blossoming":[5],"research":[6,144],"in":[7,22,32],"deep":[8,16],"learning":[9,17],"theory":[10],"attempting":[11],"to":[12],"answer,":[13],"\"Why":[14],"does":[15],"generalize?\"":[18],"A":[19],"powerful":[20],"shift":[21,44],"perspective":[23,43],"precipitated":[24],"this":[25,37],"progress:":[26],"the":[27,33,50,74,122,129,137],"study":[28],"of":[29,49,53,59,124,132,139],"overparametrized":[30],"models":[31,79,83],"interpolation":[34],"regime.":[35],"In":[36],"paper,":[38],"we":[39],"argue":[40],"that":[41,76],"another":[42],"is":[45],"due,":[46],"since":[47],"some":[48],"desirable":[51],"qualities":[52],"LLMs":[54],"are":[55,80],"not":[56],"a":[57,65],"consequence":[58],"good":[60],"statistical":[61],"generalization":[62,149],"and":[63,108,135,152],"require":[64],"separate":[66],"theoretical":[67],"explanation.":[68],"Our":[69],"core":[70],"argument":[71],"relies":[72],"on":[73,147],"observation":[75],"AR":[77],"probabilistic":[78],"inherently":[81],"non-identifiable:":[82],"zero":[84],"or":[85],"near-zero":[86],"KL":[87],"divergence":[88],"apart":[89],"--":[90,95],"thus,":[91],"equivalent":[92],"test":[93],"loss":[94],"can":[96],"exhibit":[97],"markedly":[98],"different":[99],"behaviors.":[100],"We":[101,141],"support":[102],"our":[103],"position":[104],"with":[105],"mathematical":[106],"examples":[107],"empirical":[109],"observations,":[110],"illustrating":[111],"why":[112],"non-identifiability":[113,123,131,138],"practical":[115],"relevance":[116],"through":[117],"three":[118],"case":[119],"studies:":[120],"(1)":[121],"zero-shot":[125],"rule":[126],"extrapolation;":[127],"(2)":[128],"approximate":[130],"in-context":[133],"learning;":[134],"(3)":[136],"fine-tunability.":[140],"review":[142],"promising":[143],"directions":[145],"focusing":[146],"LLM-relevant":[148],"measures,":[150],"transferability,":[151],"inductive":[153],"biases.":[154]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2024-05-10T00:00:00"}
