{"id":"https://openalex.org/W4404347031","doi":"https://doi.org/10.48550/arxiv.2410.23703","title":"OCEAN: Offline Chain-of-thought Evaluation and Alignment in Large Language Models","display_name":"OCEAN: Offline Chain-of-thought Evaluation and Alignment in Large Language Models","publication_year":2024,"publication_date":"2024-10-31","ids":{"openalex":"https://openalex.org/W4404347031","doi":"https://doi.org/10.48550/arxiv.2410.23703"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2410.23703","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.23703","pdf_url":"https://arxiv.org/pdf/2410.23703","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2410.23703","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5035658564","display_name":"Junda Wu","orcid":"https://orcid.org/0000-0001-6464-7813"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wu, Junda","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103223801","display_name":"Xintong Li","orcid":"https://orcid.org/0000-0002-7270-3376"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Xintong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100653715","display_name":"Ruoyu Wang","orcid":"https://orcid.org/0000-0003-4623-6724"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Ruoyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100374652","display_name":"Yu Xia","orcid":"https://orcid.org/0009-0003-9800-1051"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xia, Yu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101493725","display_name":"Yuxin Xiong","orcid":"https://orcid.org/0000-0003-0120-6126"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiong, Yuxin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100337987","display_name":"Jianing Wang","orcid":"https://orcid.org/0000-0001-6006-053X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Jianing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100303239","display_name":"Tong Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Tong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100441911","display_name":"Xiang Chen","orcid":"https://orcid.org/0000-0002-1180-3891"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Xiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049020775","display_name":"Branislav Kveton","orcid":"https://orcid.org/0000-0002-3965-1367"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kveton, Branislav","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037147868","display_name":"Lina Yao","orcid":"https://orcid.org/0000-0003-2235-7556"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yao, Lina","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039500313","display_name":"Jingbo Shang","orcid":"https://orcid.org/0000-0002-7249-4404"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shang, Jingbo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5021827617","display_name":"Julian McAuley","orcid":"https://orcid.org/0000-0003-0955-7588"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"McAuley, Julian","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":12,"corresponding_author_ids":["https://openalex.org/A5035658564"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9879999756813049,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9879999756813049,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5207924246788025},{"id":"https://openalex.org/keywords/chain","display_name":"Chain (unit)","score":0.4262320399284363},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.40348535776138306},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.3389712870121002},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.15507811307907104},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.07783973217010498},{"id":"https://openalex.org/keywords/astronomy","display_name":"Astronomy","score":0.06845793128013611}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5207924246788025},{"id":"https://openalex.org/C199185054","wikidata":"https://www.wikidata.org/wiki/Q552299","display_name":"Chain (unit)","level":2,"score":0.4262320399284363},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.40348535776138306},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.3389712870121002},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.15507811307907104},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.07783973217010498},{"id":"https://openalex.org/C1276947","wikidata":"https://www.wikidata.org/wiki/Q333","display_name":"Astronomy","level":1,"score":0.06845793128013611}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2410.23703","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.23703","pdf_url":"https://arxiv.org/pdf/2410.23703","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2410.23703","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2410.23703","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2410.23703","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.23703","pdf_url":"https://arxiv.org/pdf/2410.23703","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4404347031.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Offline":[0],"evaluation":[1,26,41,115],"of":[2,27,66,97,100,181,199],"LLMs":[3,36,123],"is":[4],"crucial":[5],"in":[6,15,102,122,253],"understanding":[7],"their":[8,257],"capacities,":[9],"though":[10],"current":[11],"methods":[12],"remain":[13],"underexplored":[14],"existing":[16],"research.":[17],"In":[18],"this":[19],"work,":[20],"we":[21,53,110,144,171,195,218],"focus":[22],"on":[23,38,62,84,176,209],"the":[24,28,39,63,70,103,107,129,138,173,177,182,197,200,213],"offline":[25,45,113],"chain-of-thought":[29,114,120,163,227,241],"capabilities":[30],"and":[31,50,75,80,95,127,141,149,179,190,204],"show":[32],"how":[33],"to":[34,59,69,151,224],"optimize":[35],"based":[37],"proposed":[40,201],"method.":[42],"To":[43,105,136],"enable":[44,221],"feedback":[46,61,81,175],"with":[47,132,244],"rich":[48],"knowledge":[49,55],"reasoning":[51,74,121,139,164,168,184,242],"paths,":[52,165],"use":[54],"graphs":[56],"(e.g.,":[57],"Wikidata5m)":[58],"provide":[60,205],"generated":[64,183],"chain":[65],"thoughts.":[67],"Due":[68],"heterogeneity":[71,140],"between":[72],"LLM":[73,85],"KG":[76,133,147,154,167],"structures,":[77],"direct":[78],"interaction":[79],"from":[82],"KGs":[83],"behavior":[86],"are":[87],"challenging,":[88],"as":[89,124],"they":[90],"require":[91],"accurate":[92],"entity":[93],"linking":[94],"grounding":[96,142],"LLM-generated":[98,162],"chains":[99],"thought":[101],"KG.":[104],"address":[106],"above":[108],"challenge,":[109],"propose":[111,191],"an":[112,125],"framework,":[116],"OCEAN,":[117],"which":[118],"models":[119],"MDP":[126],"evaluate":[128],"policy's":[130],"alignment":[131,180],"preference":[134],"modeling.":[135],"overcome":[137],"problems,":[143],"leverage":[145],"on-policy":[146],"exploration":[148],"RL":[150],"model":[152],"a":[153,206],"policy":[155],"that":[156,233],"generates":[157],"token-level":[158],"likelihood":[159],"distributions":[160],"for":[161,239],"simulating":[166],"preference.":[169],"Then":[170],"incorporate":[172],"knowledge-graph":[174],"validity":[178],"paths":[185,243],"into":[186],"inverse":[187],"propensity":[188],"scores":[189],"KG-IPS":[192,202],"estimator.":[193],"Theoretically,":[194],"prove":[196],"unbiasedness":[198],"estimator":[203],"lower":[207],"bound":[208],"its":[210],"variance.":[211],"With":[212],"off-policy":[214,222],"evaluated":[215],"value":[216],"function,":[217],"can":[219,235],"directly":[220],"optimization":[223],"further":[225],"enhance":[226],"alignment.":[228],"Our":[229],"empirical":[230],"study":[231],"shows":[232],"OCEAN":[234],"be":[236],"efficiently":[237],"optimized":[238],"generating":[240],"higher":[245],"estimated":[246],"values":[247],"without":[248],"affecting":[249],"LLMs'":[250],"general":[251],"abilities":[252],"downstream":[254],"tasks":[255],"or":[256],"internal":[258],"knowledge.":[259]},"counts_by_year":[],"updated_date":"2026-03-12T08:34:05.389933","created_date":"2024-11-14T00:00:00"}
