{"id":"https://openalex.org/W7160656625","doi":"https://doi.org/10.48550/arxiv.2605.06506","title":"The Frequency Confound in Language-Model Surprisal and Metaphor Novelty","display_name":"The Frequency Confound in Language-Model Surprisal and Metaphor Novelty","publication_year":2026,"publication_date":"2026-05-07","ids":{"openalex":"https://openalex.org/W7160656625","doi":"https://doi.org/10.48550/arxiv.2605.06506"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.06506","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.06506","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.06506","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5092061417","display_name":"Omar Momen","orcid":"https://orcid.org/0000-0001-8230-1453"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Momen, Omar","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5135642365","display_name":"Sina Zarrie\u00df","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zarrie\u00df, Sina","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10465","display_name":"Neurobiology of Language and Bilingualism","score":0.8307999968528748,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10465","display_name":"Neurobiology of Language and Bilingualism","score":0.8307999968528748,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11148","display_name":"Language, Metaphor, and Cognition","score":0.11940000206232071,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11431","display_name":"Action Observation and Synchronization","score":0.013299999758601189,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/novelty","display_name":"Novelty","score":0.9064000248908997},{"id":"https://openalex.org/keywords/metaphor","display_name":"Metaphor","score":0.8113999962806702},{"id":"https://openalex.org/keywords/mirroring","display_name":"Mirroring","score":0.6834999918937683},{"id":"https://openalex.org/keywords/predictability","display_name":"Predictability","score":0.6108999848365784},{"id":"https://openalex.org/keywords/word-lists-by-frequency","display_name":"Word lists by frequency","score":0.5907999873161316},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.428600013256073},{"id":"https://openalex.org/keywords/cognition","display_name":"Cognition","score":0.34439998865127563},{"id":"https://openalex.org/keywords/conceptual-metaphor","display_name":"Conceptual metaphor","score":0.3312999904155731}],"concepts":[{"id":"https://openalex.org/C2778738651","wikidata":"https://www.wikidata.org/wiki/Q16546687","display_name":"Novelty","level":2,"score":0.9064000248908997},{"id":"https://openalex.org/C2778311575","wikidata":"https://www.wikidata.org/wiki/Q18534","display_name":"Metaphor","level":2,"score":0.8113999962806702},{"id":"https://openalex.org/C189645446","wikidata":"https://www.wikidata.org/wiki/Q350865","display_name":"Mirroring","level":2,"score":0.6834999918937683},{"id":"https://openalex.org/C197640229","wikidata":"https://www.wikidata.org/wiki/Q2534066","display_name":"Predictability","level":2,"score":0.6108999848365784},{"id":"https://openalex.org/C175293574","wikidata":"https://www.wikidata.org/wiki/Q697133","display_name":"Word lists by frequency","level":3,"score":0.5907999873161316},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.5778999924659729},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.4634999930858612},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.428600013256073},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4269999861717224},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39800000190734863},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.375900000333786},{"id":"https://openalex.org/C169900460","wikidata":"https://www.wikidata.org/wiki/Q2200417","display_name":"Cognition","level":2,"score":0.34439998865127563},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.3328000009059906},{"id":"https://openalex.org/C2776402256","wikidata":"https://www.wikidata.org/wiki/Q2633818","display_name":"Conceptual metaphor","level":3,"score":0.3312999904155731},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3160000145435333},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.3160000145435333},{"id":"https://openalex.org/C142853389","wikidata":"https://www.wikidata.org/wiki/Q744778","display_name":"Association (psychology)","level":2,"score":0.31439998745918274},{"id":"https://openalex.org/C2779899686","wikidata":"https://www.wikidata.org/wiki/Q571227","display_name":"Implicature","level":3,"score":0.3109000027179718},{"id":"https://openalex.org/C137546455","wikidata":"https://www.wikidata.org/wiki/Q3213474","display_name":"Trigram","level":2,"score":0.2840000092983246},{"id":"https://openalex.org/C89267518","wikidata":"https://www.wikidata.org/wiki/Q179488","display_name":"Psycholinguistics","level":3,"score":0.27570000290870667},{"id":"https://openalex.org/C11693617","wikidata":"https://www.wikidata.org/wiki/Q181839","display_name":"Pragmatics","level":2,"score":0.2660999894142151},{"id":"https://openalex.org/C2777200299","wikidata":"https://www.wikidata.org/wiki/Q52943","display_name":"Conversation","level":2,"score":0.2655999958515167},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.2599000036716461},{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.2529999911785126},{"id":"https://openalex.org/C33002781","wikidata":"https://www.wikidata.org/wiki/Q4826617","display_name":"Automaticity","level":3,"score":0.2524000108242035}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.06506","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.06506","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.06506","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.06506","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.6923505067825317,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Language-model":[0],"(LM)":[1],"surprisal":[2,23,46,102],"is":[3,24,61],"widely":[4],"used":[5],"as":[6],"a":[7,62,86],"proxy":[8],"for":[9],"contextual":[10,107],"predictability":[11,108],"and":[12,53,81,112],"has":[13],"been":[14],"reported":[15],"to":[16],"correlate":[17],"with":[18,27,109],"metaphor":[19,35,66,110],"novelty":[20,36,67,111],"judgments.":[21],"However,":[22],"tightly":[25],"intertwined":[26],"lexical":[28,116],"frequency.":[29],"We":[30,44],"explore":[31],"this":[32],"interaction":[33],"on":[34],"ratings":[37],"using":[38],"two":[39],"different":[40],"word":[41,59],"frequency":[42,60,117],"measures.":[43],"analyse":[45],"estimates":[47],"from":[48],"eight":[49],"Pythia":[50],"model":[51],"sizes":[52],"154":[54],"training":[55,71],"checkpoints.":[56],"Across":[57,70],"settings,":[58],"stronger":[63],"predictor":[64],"of":[65],"than":[68],"surprisal.":[69],"stages,":[72],"the":[73,91,98,120],"surprisal--novelty":[74],"association":[75],"peaks":[76],"at":[77],"an":[78],"early":[79],"stage":[80],"then":[82],"falls":[83],"again,":[84],"mirroring":[85],"similarly":[87],"timed":[88],"increase":[89],"in":[90],"surprisal--frequency":[92],"association.":[93],"These":[94],"results":[95],"suggest":[96],"that":[97],"often-reported":[99],"optimal":[100],"LM":[101],"settings":[103],"may":[104,118],"incorrectly":[105],"associate":[106],"processing":[113],"difficulty,":[114],"whereas":[115],"be":[119],"major":[121],"underlying":[122],"factor.":[123]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-09T00:00:00"}
