{"id":"https://openalex.org/W4412888808","doi":"https://doi.org/10.18653/v1/2025.findings-acl.90","title":"Systematic Generalization in Language Models Scales with Information Entropy","display_name":"Systematic Generalization in Language Models Scales with Information Entropy","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4412888808","doi":"https://doi.org/10.18653/v1/2025.findings-acl.90"},"language":"en","primary_location":{"id":"doi:10.18653/v1/2025.findings-acl.90","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-acl.90","pdf_url":"https://aclanthology.org/2025.findings-acl.90.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: ACL 2025","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.findings-acl.90.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5076751435","display_name":"Sondre Wold","orcid":null},"institutions":[{"id":"https://openalex.org/I4210107233","display_name":"Language Science (South Korea)","ror":"https://ror.org/01h9v1373","country_code":"KR","type":"company","lineage":["https://openalex.org/I4210107233"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Sondre Wold","raw_affiliation_strings":["Language Technology Group University of Oslo"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Language Technology Group University of Oslo","institution_ids":["https://openalex.org/I4210107233"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026075659","display_name":"Lucas Georges Gabriel Charpentier","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lucas Georges Gabriel Charpentier","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5054258348","display_name":"\u00c9tienne Simon","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"\u00c9tienne Simon","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.08631503,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1807","last_page":"1819"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.5618000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.5618000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6002011895179749},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5735384225845337},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5542383790016174},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.5207459926605225},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4827705919742584},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2277052104473114},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.07555520534515381}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6002011895179749},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5735384225845337},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5542383790016174},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.5207459926605225},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4827705919742584},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2277052104473114},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.07555520534515381},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.findings-acl.90","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-acl.90","pdf_url":"https://aclanthology.org/2025.findings-acl.90.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: ACL 2025","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.findings-acl.90","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-acl.90","pdf_url":"https://aclanthology.org/2025.findings-acl.90.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: ACL 2025","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320322725","display_name":"China Scholarship Council","ror":"https://ror.org/04atp4p48"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4412888808.pdf","grobid_xml":"https://content.openalex.org/works/W4412888808.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W3162204513","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Systematic":[0],"generalization":[1,49,60,103],"remains":[2],"challenging":[3],"for":[4,34,80,134],"current":[5],"language":[6],"models,":[7],"which":[8],"are":[9],"known":[10,26],"to":[11,15,23,42,104],"be":[12,62,117],"both":[13],"sensitive":[14],"semantically":[16],"similar":[17],"permutations":[18],"of":[19,46,58,67,70,92],"the":[20,44,65,68,74,90,98],"input":[21],"and":[22,87,107,123],"struggle":[24],"with":[25,97],"concepts":[27],"presented":[28],"in":[29,73,83],"novel":[30],"contexts.Although":[31],"benchmarks":[32],"exist":[33],"assessing":[35,135],"compositional":[36],"behavior,":[37],"it":[38],"is":[39],"unclear":[40],"how":[41,55],"measure":[43],"difficulty":[45],"a":[47,78,84,132],"systematic":[48,59,102,139],"problem.In":[50],"this":[51],"work,":[52],"we":[53],"show":[54],"one":[56],"aspect":[57],"can":[61,116,129],"described":[63],"by":[64],"entropy":[66,82,115,128],"distribution":[69],"component":[71],"parts":[72],"training":[75],"data.We":[76],"formalize":[77],"framework":[79],"measuring":[81],"sequence-to-sequence":[85],"task":[86],"find":[88],"that":[89,111,124],"performance":[91],"popular":[93],"model":[94],"architectures":[95],"scales":[96],"entropy.Our":[99],"work":[100],"connects":[101],"information":[105],"efficiency,":[106],"our":[108],"results":[109],"indicate":[110],"success":[112,125],"at":[113,126],"high":[114],"achieved":[118],"even":[119],"without":[120],"built-in":[121],"priors,":[122],"low":[127],"serve":[130],"as":[131],"target":[133],"progress":[136],"towards":[137],"robust":[138],"generalization.":[140]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
