{"id":"https://openalex.org/W7151971277","doi":"https://doi.org/10.48550/arxiv.2604.05517","title":"UniCreative: Unifying Long-form Logic and Short-form Sparkle via Reference-Free Reinforcement Learning","display_name":"UniCreative: Unifying Long-form Logic and Short-form Sparkle via Reference-Free Reinforcement Learning","publication_year":2026,"publication_date":"2026-04-07","ids":{"openalex":"https://openalex.org/W7151971277","doi":"https://doi.org/10.48550/arxiv.2604.05517"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.05517","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.05517","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.05517","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101617593","display_name":"Xiaolong Wei","orcid":"https://orcid.org/0000-0002-6905-087X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei, Xiaolong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102345692","display_name":"Zerun Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Zerun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133216372","display_name":"Simin Niu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Niu, Simin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133151867","display_name":"Xingyu Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Xingyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133163809","display_name":"Peiying Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Peiying","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133190197","display_name":"Changxuan Xiao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiao, Changxuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133171123","display_name":"Yuchen Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Yuchen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133172627","display_name":"Jicheng Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Jicheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124875636","display_name":"Zhejun Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Zhejun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133217230","display_name":"Chong Meng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Meng, Chong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133200865","display_name":"Long Xia","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xia, Long","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133207658","display_name":"Daiting Shi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shi, Daiting","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":12,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.8047000169754028,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.8047000169754028,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.027400000020861626,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12377","display_name":"Digital Humanities and Scholarship","score":0.02539999969303608,"subfield":{"id":"https://openalex.org/subfields/1208","display_name":"Literature and Literary Theory"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7249000072479248},{"id":"https://openalex.org/keywords/coherence","display_name":"Coherence (philosophical gambling strategy)","score":0.5613999962806702},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4253000020980835},{"id":"https://openalex.org/keywords/creativity","display_name":"Creativity","score":0.3822000026702881},{"id":"https://openalex.org/keywords/preference","display_name":"Preference","score":0.36079999804496765},{"id":"https://openalex.org/keywords/narrative","display_name":"Narrative","score":0.34209999442100525}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7634999752044678},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7249000072479248},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5920000076293945},{"id":"https://openalex.org/C2781181686","wikidata":"https://www.wikidata.org/wiki/Q4226068","display_name":"Coherence (philosophical gambling strategy)","level":2,"score":0.5613999962806702},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4253000020980835},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3959999978542328},{"id":"https://openalex.org/C11012388","wikidata":"https://www.wikidata.org/wiki/Q170658","display_name":"Creativity","level":2,"score":0.3822000026702881},{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.36079999804496765},{"id":"https://openalex.org/C199033989","wikidata":"https://www.wikidata.org/wiki/Q1318295","display_name":"Narrative","level":2,"score":0.34209999442100525},{"id":"https://openalex.org/C2778334786","wikidata":"https://www.wikidata.org/wiki/Q1586270","display_name":"Variation (astronomy)","level":2,"score":0.298799991607666},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.29190000891685486},{"id":"https://openalex.org/C181204326","wikidata":"https://www.wikidata.org/wiki/Q7239820","display_name":"Preference learning","level":3,"score":0.2678000032901764},{"id":"https://openalex.org/C196340769","wikidata":"https://www.wikidata.org/wiki/Q7698910","display_name":"Temporal difference learning","level":3,"score":0.2605000138282776},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.2574999928474426}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.05517","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.05517","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.05517","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.05517","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.8247503042221069,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"A":[0],"fundamental":[1],"challenge":[2],"in":[3,7,16,23],"creative":[4],"writing":[5,140],"lies":[6],"reconciling":[8],"the":[9,150,167],"inherent":[10],"tension":[11],"between":[12,156],"maintaining":[13],"global":[14],"coherence":[15],"long-form":[17],"narratives":[18],"and":[19,49,59,114,120,161],"preserving":[20],"local":[21],"expressiveness":[22],"short-form":[24,33],"texts.":[25],"While":[26],"long-context":[27],"generation":[28],"necessitates":[29],"explicit":[30],"macroscopic":[31],"planning,":[32],"creativity":[34],"often":[35],"demands":[36],"spontaneous,":[37],"constraint-free":[38],"expression.":[39],"Existing":[40],"alignment":[41,172],"paradigms,":[42],"however,":[43],"typically":[44],"employ":[45],"static":[46],"reward":[47,82],"signals":[48],"rely":[50],"heavily":[51],"on":[52],"high-quality":[53],"supervised":[54,118],"data,":[55],"which":[56],"is":[57],"costly":[58],"difficult":[60],"to":[61,89,153],"scale.":[62],"To":[63],"address":[64],"this,":[65],"we":[66,97],"propose":[67,98],"\\textbf{UniCreative},":[68],"a":[69,100],"unified":[70],"reference-free":[71],"reinforcement":[72],"learning":[73],"framework.":[74],"We":[75],"first":[76],"introduce":[77],"\\textbf{AC-GenRM},":[78],"an":[79,146],"adaptive":[80],"constraint-aware":[81],"model":[83,151],"that":[84,104,126],"dynamically":[85],"synthesizes":[86],"query-specific":[87],"criteria":[88],"provide":[90],"fine-grained":[91],"preference":[92],"judgments.":[93],"Leveraging":[94],"these":[95],"signals,":[96],"\\textbf{ACPO},":[99],"policy":[101],"optimization":[102],"algorithm":[103],"aligns":[105,128],"models":[106],"with":[107,130],"human":[108],"preferences":[109],"across":[110,138],"both":[111],"content":[112],"quality":[113],"structural":[115],"paradigms":[116],"without":[117],"fine-tuning":[119],"ground-truth":[121],"references.":[122],"Empirical":[123],"results":[124],"demonstrate":[125],"AC-GenRM":[127],"closely":[129],"expert":[131],"evaluations,":[132],"while":[133],"ACPO":[134],"significantly":[135],"enhances":[136],"performance":[137],"diverse":[139],"tasks.":[141],"Crucially,":[142],"our":[143,170],"analysis":[144],"reveals":[145],"emergent":[147],"meta-cognitive":[148],"ability:":[149],"learns":[152],"autonomously":[154],"differentiate":[155],"tasks":[157],"requiring":[158],"rigorous":[159],"planning":[160],"those":[162],"favoring":[163],"direct":[164,171],"generation,":[165],"validating":[166],"effectiveness":[168],"of":[169],"approach.":[173]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-09T00:00:00"}
