{"id":"https://openalex.org/W7162571653","doi":"https://doi.org/10.48550/arxiv.2605.26738","title":"KARMA: Karma-Aligned Reward Model Adaptation","display_name":"KARMA: Karma-Aligned Reward Model Adaptation","publication_year":2026,"publication_date":"2026-05-26","ids":{"openalex":"https://openalex.org/W7162571653","doi":"https://doi.org/10.48550/arxiv.2605.26738"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.26738","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.26738","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.26738","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5137092514","display_name":"Jared Scott","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Scott, Jared","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5137145887","display_name":"Jesse Roberts","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Roberts, Jesse","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12488","display_name":"Mental Health via Writing","score":0.7026000022888184,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12488","display_name":"Mental Health via Writing","score":0.7026000022888184,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.07670000195503235,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11519","display_name":"Digital Mental Health Interventions","score":0.050599999725818634,"subfield":{"id":"https://openalex.org/subfields/3202","display_name":"Applied Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/karma","display_name":"Karma","score":0.5896999835968018},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5306000113487244},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.4796999990940094},{"id":"https://openalex.org/keywords/downstream","display_name":"Downstream (manufacturing)","score":0.4027999937534332},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.3693999946117401},{"id":"https://openalex.org/keywords/social-learning","display_name":"Social learning","score":0.33169999718666077}],"concepts":[{"id":"https://openalex.org/C547328371","wikidata":"https://www.wikidata.org/wiki/Q132196","display_name":"Karma","level":3,"score":0.5896999835968018},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5712000131607056},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5306000113487244},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.49790000915527344},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.4796999990940094},{"id":"https://openalex.org/C2776207758","wikidata":"https://www.wikidata.org/wiki/Q5303302","display_name":"Downstream (manufacturing)","level":2,"score":0.4027999937534332},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39660000801086426},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.3905999958515167},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.3693999946117401},{"id":"https://openalex.org/C79416737","wikidata":"https://www.wikidata.org/wiki/Q2305519","display_name":"Social learning","level":2,"score":0.33169999718666077},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.3269999921321869},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.31279999017715454},{"id":"https://openalex.org/C518677369","wikidata":"https://www.wikidata.org/wiki/Q202833","display_name":"Social media","level":2,"score":0.2969000041484833},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.29679998755455017},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.2687999904155731},{"id":"https://openalex.org/C130064352","wikidata":"https://www.wikidata.org/wiki/Q853725","display_name":"Social relation","level":2,"score":0.2556000053882599},{"id":"https://openalex.org/C117035363","wikidata":"https://www.wikidata.org/wiki/Q3769299","display_name":"Human behavior","level":2,"score":0.25279998779296875}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.26738","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.26738","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.26738","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.26738","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.4967890977859497,"display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Human":[0],"communication":[1],"depends":[2],"on":[3,48,56,72,97],"implicit":[4],"social":[5,40,131],"signals":[6],"where":[7],"effectiveness":[8],"is":[9,148,172],"shaped":[10],"by":[11,151,182],"tone,":[12],"context,":[13,57],"and":[14,58,125],"conversational":[15,36,98],"norms":[16],"rather":[17,179],"than":[18,180],"semantic":[19],"content":[20],"alone.":[21],"We":[22,113],"introduce":[23],"KARMA":[24,43,118,152],"(Karma-Aligned":[25],"Reward":[26],"Model":[27],"Adaptation),":[28],"a":[29,45,92,101,121],"framework":[30],"for":[31],"LLM":[32],"learning":[33,68],"of":[34,104,117],"context-sensitive":[35],"behavior":[37],"from":[38],"large-scale":[39],"interaction":[41],"data.":[42,133,185],"trains":[44],"reward":[46,82,93,176],"model":[47,83,90,94,123,160],"Reddit":[49,105,166],"conversations":[50],"to":[51,62,69,87,120,129,165],"predict":[52],"response":[53],"valuation":[54],"conditioned":[55],"uses":[59],"this":[60,170],"signal":[61,177],"fine-tune":[63],"language":[64],"models":[65,136],"via":[66],"reinforcement":[67],"improve":[70],"performance":[71],"pragmatics-mediated":[73,139],"tasks.":[74],"Critically,":[75],"we":[76],"find":[77],"that":[78,169],"the":[79,115,130,158,175],"highest":[80],"performing":[81],"does":[84],"not":[85],"lead":[86],"better":[88,110],"downstream":[89,111,122,159],"alignment:":[91],"relying":[95],"exclusively":[96],"context":[99],"was":[100],"worse":[102],"predictor":[103],"karma":[106],"but":[107],"yielded":[108],"substantially":[109],"performance.":[112],"evaluate":[114],"effects":[116],"applied":[119],"with":[124,141],"without":[126],"direct":[127,163],"exposure":[128,164],"media":[132],"The":[134],"resulting":[135],"show":[137],"improved":[138],"behaviors":[140],"largely":[142],"mitigated":[143],"undesirable":[144],"side":[145],"effects.":[146],"Factuality":[147],"consistently":[149],"diminished":[150],"across":[153],"all":[154],"conditions,":[155],"including":[156],"when":[157],"has":[161],"no":[162],"data,":[167],"suggesting":[168],"tension":[171],"embedded":[173],"in":[174],"itself":[178],"introduced":[181],"noisy":[183],"training":[184]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-28T00:00:00"}
