{"id":"https://openalex.org/W4402853841","doi":"https://doi.org/10.48550/arxiv.2407.15762","title":"Conditional Language Policy: A General Framework for Steerable Multi-Objective Finetuning","display_name":"Conditional Language Policy: A General Framework for Steerable Multi-Objective Finetuning","publication_year":2024,"publication_date":"2024-07-22","ids":{"openalex":"https://openalex.org/W4402853841","doi":"https://doi.org/10.48550/arxiv.2407.15762"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2407.15762","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.15762","pdf_url":"https://arxiv.org/pdf/2407.15762","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2407.15762","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100722760","display_name":"Kaiwen Wang","orcid":"https://orcid.org/0000-0003-3857-4790"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wang, Kaiwen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057958491","display_name":"Rahul Kidambi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kidambi, Rahul","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107580321","display_name":"Ryan Sullivan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sullivan, Ryan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101783911","display_name":"A. Agarwal","orcid":"https://orcid.org/0009-0005-4916-1045"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Agarwal, Alekh","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015966835","display_name":"Christoph Dann","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dann, Christoph","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046831695","display_name":"Andrea Michi","orcid":"https://orcid.org/0009-0001-4797-3593"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Michi, Andrea","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104224050","display_name":"Marco Gelmi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gelmi, Marco","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107646464","display_name":"Yunxuan Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Yunxuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018229381","display_name":"Raghav Gupta","orcid":"https://orcid.org/0000-0002-9546-2632"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gupta, Raghav","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036039474","display_name":"Avinava Dubey","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dubey, Avinava","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057217987","display_name":"Alexandre Ram\u00e9","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ram\u00e9, Alexandre","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087706654","display_name":"Johan Ferret","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ferret, Johan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028486593","display_name":"Geoffrey Cideron","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cideron, Geoffrey","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101561297","display_name":"Le Hou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hou, Le","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100537890","display_name":"Hongkun Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Hongkun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009154893","display_name":"Amr Ahmed","orcid":"https://orcid.org/0000-0002-7749-7911"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ahmed, Amr","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075894805","display_name":"Aranyak Mehta","orcid":"https://orcid.org/0000-0002-6132-4901"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mehta, Aranyak","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079135104","display_name":"L\u00e9onard Hussenot","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hussenot, L\u00e9onard","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089890773","display_name":"Olivier Bachem","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bachem, Olivier","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5107646463","display_name":"Edouard Leurent","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Leurent, Edouard","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":20,"corresponding_author_ids":["https://openalex.org/A5100722760"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12373","display_name":"Linguistic research and analysis","score":0.0934000015258789,"subfield":{"id":"https://openalex.org/subfields/1203","display_name":"Language and Linguistics"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12373","display_name":"Linguistic research and analysis","score":0.0934000015258789,"subfield":{"id":"https://openalex.org/subfields/1203","display_name":"Language and Linguistics"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10034","display_name":"Syntax, Semantics, Linguistic Variation","score":0.08940000087022781,"subfield":{"id":"https://openalex.org/subfields/1203","display_name":"Language and Linguistics"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.08330000191926956,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.513916015625}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.513916015625}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2407.15762","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.15762","pdf_url":"https://arxiv.org/pdf/2407.15762","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2407.15762","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2407.15762","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2407.15762","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.15762","pdf_url":"https://arxiv.org/pdf/2407.15762","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4402853841.pdf","grobid_xml":"https://content.openalex.org/works/W4402853841.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W2382290278","https://openalex.org/W4395014643"],"abstract_inverted_index":{"Reward-based":[0],"finetuning":[1,46],"is":[2,18],"crucial":[3],"for":[4,45,114],"aligning":[5],"language":[6,22,47,105],"policies":[7],"with":[8],"intended":[9],"behaviors":[10],"(e.g.,":[11],"creativity":[12],"and":[13,32,58,93,109],"safety).":[14],"A":[15],"key":[16],"challenge":[17],"to":[19,83],"develop":[20],"steerable":[21,63,104],"models":[23,48,64,82,106],"that":[24,65,101,107],"trade-off":[25,67],"multiple":[26,50,81],"(conflicting)":[27],"objectives":[28,69],"in":[29],"a":[30,42],"flexible":[31],"efficient":[33],"manner.":[34],"This":[35],"paper":[36],"presents":[37],"Conditional":[38],"Language":[39],"Policy":[40],"(CLP),":[41],"general":[43],"framework":[44],"on":[49,53,95],"objectives.":[51,89],"Building":[52],"techniques":[54],"from":[55],"multi-task":[56],"training":[57,78],"parameter-efficient":[59],"finetuning,":[60],"CLP":[61,102],"learn":[62],"effectively":[66],"conflicting":[68],"at":[70],"inference":[71],"time.":[72],"Notably,":[73],"this":[74],"does":[75],"not":[76],"require":[77],"or":[79],"maintaining":[80],"achieve":[84],"different":[85],"trade-offs":[86],"between":[87],"the":[88,111],"Through":[90],"extensive":[91],"experiments":[92],"ablations":[94],"two":[96],"summarization":[97],"datasets,":[98],"we":[99],"show":[100],"learns":[103],"outperform":[108],"Pareto-dominate":[110],"existing":[112],"approaches":[113],"multi-objective":[115],"finetuning.":[116]},"counts_by_year":[],"updated_date":"2026-03-11T14:59:36.786465","created_date":"2024-09-26T00:00:00"}
