{"id":"https://openalex.org/W4410151876","doi":"https://doi.org/10.1162/tacl_a_00748","title":"Anchored Preference Optimization and Contrastive Revisions: Addressing Underspecification in Alignment","display_name":"Anchored Preference Optimization and Contrastive Revisions: Addressing Underspecification in Alignment","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4410151876","doi":"https://doi.org/10.1162/tacl_a_00748"},"language":"en","primary_location":{"id":"doi:10.1162/tacl_a_00748","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00748","pdf_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00748/2522342/tacl_a_00748.pdf","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00748/2522342/tacl_a_00748.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031474902","display_name":"Karel D\u2019Oosterlinck","orcid":null},"institutions":[{"id":"https://openalex.org/I32597200","display_name":"Ghent University","ror":"https://ror.org/00cv9y106","country_code":"BE","type":"education","lineage":["https://openalex.org/I32597200"]},{"id":"https://openalex.org/I4210128771","display_name":"Contextual Change (United States)","ror":"https://ror.org/03bskcm82","country_code":"US","type":"company","lineage":["https://openalex.org/I4210128771"]}],"countries":["BE","US"],"is_corresponding":true,"raw_author_name":"Karel D'Oosterlinck","raw_affiliation_strings":["Contextual AI, USA","Ghent University \u2013 imec, Belgium karel@contextual.ai"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Contextual AI, USA","institution_ids":["https://openalex.org/I4210128771"]},{"raw_affiliation_string":"Ghent University \u2013 imec, Belgium karel@contextual.ai","institution_ids":["https://openalex.org/I32597200"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113112971","display_name":"Winnie Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128771","display_name":"Contextual Change (United States)","ror":"https://ror.org/03bskcm82","country_code":"US","type":"company","lineage":["https://openalex.org/I4210128771"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Winnie Xu","raw_affiliation_strings":["Contextual AI, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Contextual AI, USA","institution_ids":["https://openalex.org/I4210128771"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084742757","display_name":"Chris Develder","orcid":"https://orcid.org/0000-0003-2707-4176"},"institutions":[{"id":"https://openalex.org/I32597200","display_name":"Ghent University","ror":"https://ror.org/00cv9y106","country_code":"BE","type":"education","lineage":["https://openalex.org/I32597200"]}],"countries":["BE"],"is_corresponding":false,"raw_author_name":"Chris Develder","raw_affiliation_strings":["Ghent University \u2013 imec, Belgium"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ghent University \u2013 imec, Belgium","institution_ids":["https://openalex.org/I32597200"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075509168","display_name":"Thomas Demeester","orcid":"https://orcid.org/0000-0002-9901-5768"},"institutions":[{"id":"https://openalex.org/I32597200","display_name":"Ghent University","ror":"https://ror.org/00cv9y106","country_code":"BE","type":"education","lineage":["https://openalex.org/I32597200"]}],"countries":["BE"],"is_corresponding":false,"raw_author_name":"Thomas Demeester","raw_affiliation_strings":["Ghent University \u2013 imec, Belgium"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ghent University \u2013 imec, Belgium","institution_ids":["https://openalex.org/I32597200"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100672985","display_name":"Amanpreet Singh","orcid":"https://orcid.org/0000-0002-3409-9833"},"institutions":[{"id":"https://openalex.org/I4210128771","display_name":"Contextual Change (United States)","ror":"https://ror.org/03bskcm82","country_code":"US","type":"company","lineage":["https://openalex.org/I4210128771"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Amanpreet Singh","raw_affiliation_strings":["Contextual AI, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Contextual AI, USA","institution_ids":["https://openalex.org/I4210128771"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042601761","display_name":"Christopher Potts","orcid":"https://orcid.org/0000-0002-7978-6055"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Christopher Potts","raw_affiliation_strings":["Stanford University, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Stanford University, USA","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016956470","display_name":"Douwe Kiela","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128771","display_name":"Contextual Change (United States)","ror":"https://ror.org/03bskcm82","country_code":"US","type":"company","lineage":["https://openalex.org/I4210128771"]},{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Douwe Kiela","raw_affiliation_strings":["Contextual AI, USA","Stanford University, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Contextual AI, USA","institution_ids":["https://openalex.org/I4210128771"]},{"raw_affiliation_string":"Stanford University, USA","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5014641065","display_name":"Shikib Mehri","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128771","display_name":"Contextual Change (United States)","ror":"https://ror.org/03bskcm82","country_code":"US","type":"company","lineage":["https://openalex.org/I4210128771"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shikib Mehri","raw_affiliation_strings":["Contextual AI, USA. shikib@contextual.ai"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Contextual AI, USA. shikib@contextual.ai","institution_ids":["https://openalex.org/I4210128771"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5031474902"],"corresponding_institution_ids":["https://openalex.org/I32597200","https://openalex.org/I4210128771"],"apc_list":null,"apc_paid":null,"fwci":7.1901,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.96515017,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":"13","issue":null,"first_page":"442","last_page":"460"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11596","display_name":"Constraint Satisfaction and Optimization","score":0.9818000197410583,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10639","display_name":"Advanced Software Engineering Methodologies","score":0.9569000005722046,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/underspecification","display_name":"Underspecification","score":0.9772140383720398},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7849736213684082},{"id":"https://openalex.org/keywords/preference","display_name":"Preference","score":0.6591292023658752},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4187087416648865},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3675265312194824},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.36694347858428955},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3614400327205658},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.07394722104072571},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.06810230016708374}],"concepts":[{"id":"https://openalex.org/C2779631151","wikidata":"https://www.wikidata.org/wiki/Q7883767","display_name":"Underspecification","level":2,"score":0.9772140383720398},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7849736213684082},{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.6591292023658752},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4187087416648865},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3675265312194824},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.36694347858428955},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3614400327205658},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.07394722104072571},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.06810230016708374},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1162/tacl_a_00748","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00748","pdf_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00748/2522342/tacl_a_00748.pdf","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},{"id":"pmh:oai:archive.ugent.be:01JVM1W1R95KK8S5SKPMQVS335","is_oa":true,"landing_page_url":"http://hdl.handle.net/1854/LU-01JVM1W1R95KK8S5SKPMQVS335","pdf_url":"https://biblio.ugent.be/publication/01JVM1W1R95KK8S5SKPMQVS335/file/01JVM1WMXB8H8N787RMRM9VQJ8.pdf","source":{"id":"https://openalex.org/S4306400478","display_name":"Ghent University Academic Bibliography (Ghent University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I32597200","host_organization_name":"Ghent University","host_organization_lineage":["https://openalex.org/I32597200"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ISSN: 2307-387X","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"doi:10.1162/tacl_a_00748","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00748","pdf_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00748/2522342/tacl_a_00748.pdf","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321730","display_name":"Fonds Wetenschappelijk Onderzoek","ror":"https://ror.org/03qtxy027"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4410151876.pdf","grobid_xml":"https://content.openalex.org/works/W4410151876.grobid-xml"},"referenced_works_count":79,"referenced_works":["https://openalex.org/W1583837637","https://openalex.org/W2128984831","https://openalex.org/W2736601468","https://openalex.org/W2794325560","https://openalex.org/W2890894339","https://openalex.org/W2912083425","https://openalex.org/W2946609015","https://openalex.org/W2963339397","https://openalex.org/W2970062726","https://openalex.org/W2998617917","https://openalex.org/W4226278401","https://openalex.org/W4286892945","https://openalex.org/W4287024925","https://openalex.org/W4292779060","https://openalex.org/W4311991106","https://openalex.org/W4377121527","https://openalex.org/W4378771755","https://openalex.org/W4385571099","https://openalex.org/W4385571157","https://openalex.org/W4385571189","https://openalex.org/W4389821477","https://openalex.org/W4390963024","https://openalex.org/W4391555989","https://openalex.org/W4392019816","https://openalex.org/W4393335839","https://openalex.org/W4394591101","https://openalex.org/W4394653480","https://openalex.org/W4394654134","https://openalex.org/W4394708144","https://openalex.org/W4394708837","https://openalex.org/W4396650433","https://openalex.org/W4398859186","https://openalex.org/W4399198515","https://openalex.org/W4399447566","https://openalex.org/W4399597322","https://openalex.org/W4400104408","https://openalex.org/W4401042689","https://openalex.org/W4402466821","https://openalex.org/W4402670121","https://openalex.org/W4402670301","https://openalex.org/W4404782689","https://openalex.org/W4404782810","https://openalex.org/W6739585900","https://openalex.org/W6741002519","https://openalex.org/W6749838110","https://openalex.org/W6755829550","https://openalex.org/W6764256271","https://openalex.org/W6764401283","https://openalex.org/W6778883912","https://openalex.org/W6782879696","https://openalex.org/W6791376898","https://openalex.org/W6800166007","https://openalex.org/W6803096969","https://openalex.org/W6810738896","https://openalex.org/W6846930601","https://openalex.org/W6847753483","https://openalex.org/W6852418670","https://openalex.org/W6852652558","https://openalex.org/W6857200042","https://openalex.org/W6857273126","https://openalex.org/W6859795049","https://openalex.org/W6861084137","https://openalex.org/W6861324011","https://openalex.org/W6862159049","https://openalex.org/W6862631004","https://openalex.org/W6863592052","https://openalex.org/W6864029989","https://openalex.org/W6864260629","https://openalex.org/W6864463556","https://openalex.org/W6864650623","https://openalex.org/W6864842089","https://openalex.org/W6868500021","https://openalex.org/W6869069171","https://openalex.org/W6869311773","https://openalex.org/W6869889705","https://openalex.org/W6869942539","https://openalex.org/W6872431211","https://openalex.org/W6910851958","https://openalex.org/W7073593060"],"related_works":["https://openalex.org/W2048198238","https://openalex.org/W2041829265","https://openalex.org/W1984757285","https://openalex.org/W2048884548","https://openalex.org/W1500924816","https://openalex.org/W4200263774","https://openalex.org/W3097313621","https://openalex.org/W2744370558","https://openalex.org/W2161345614","https://openalex.org/W2058926172"],"abstract_inverted_index":{"Abstract":[0],"Large":[1],"Language":[2],"Models":[3],"(LLMs)":[4],"are":[5,51,169],"often":[6],"aligned":[7],"using":[8,108],"contrastive":[9,90],"alignment":[10,25,55,103,113],"objectives":[11,56,114],"and":[12,22,36,53,93,100,112,115,137,167],"preference":[13,40,91],"pair":[14],"datasets.":[15],"The":[16,125],"interaction":[17],"between":[18],"model,":[19,146],"paired":[20],"data,":[21],"objective":[23],"makes":[24],"a":[26,43,83,98],"complicated":[27],"procedure,":[28],"sometimes":[29],"producing":[30],"subpar":[31],"results.":[32],"We":[33,105],"study":[34],"this":[35],"find":[37],"that":[38],"(i)":[39],"data":[41],"gives":[42],"better":[44,59],"learning":[45],"signal":[46],"when":[47,61],"the":[48,67,130,159],"underlying":[49],"responses":[50],"contrastive,":[52],"(ii)":[54],"lead":[57,128],"to":[58,88,129],"performance":[60,132],"they":[62],"specify":[63],"more":[64,89,101],"control":[65],"over":[66],"model":[68],"during":[69],"training.":[70],"Based":[71],"on":[72,148],"these":[73],"insights,":[74],"we":[75],"introduce":[76],"Contrastive":[77],"Learning":[78],"from":[79],"AI":[80],"Revisions":[81],"(CLAIR),":[82],"data-creation":[84],"method":[85],"which":[86,119],"leads":[87],"pairs,":[92],"Anchored":[94],"Preference":[95],"Optimization":[96],"(APO),":[97],"controllable":[99,142],"stable":[102],"objective.":[104],"align":[106],"Llama-3-8B-Instruct":[107,155],"various":[109],"comparable":[110],"datasets":[111,168],"measure":[116],"MixEval-Hard":[117],"scores,":[118],"correlate":[120],"highly":[121],"with":[122,152,161],"human":[123],"judgments.":[124],"CLAIR":[126,150],"preferences":[127,151],"strongest":[131],"out":[133],"of":[134],"all":[135],"datasets,":[136],"APO":[138],"consistently":[139],"outperforms":[140],"less":[141],"objectives.":[143],"Our":[144,165],"best":[145],"trained":[147],"32K":[149],"APO,":[153],"improves":[154],"by":[156,163],"7.65%,":[157],"closing":[158],"gap":[160],"GPT4-turbo":[162],"45%.":[164],"code":[166],"available.":[170]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":2}],"updated_date":"2026-06-06T09:05:17.133730","created_date":"2025-10-10T00:00:00"}
