{"id":"https://openalex.org/W7155001052","doi":"https://doi.org/10.48550/arxiv.2604.15602","title":"GroupDPO: Memory efficient Group-wise Direct Preference Optimization","display_name":"GroupDPO: Memory efficient Group-wise Direct Preference Optimization","publication_year":2026,"publication_date":"2026-04-17","ids":{"openalex":"https://openalex.org/W7155001052","doi":"https://doi.org/10.48550/arxiv.2604.15602"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.15602","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.15602","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.15602","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134039004","display_name":"Jixuan Leng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Leng, Jixuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134060352","display_name":"Si Si","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Si, Si","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102851341","display_name":"Hsiang\u2010Fu Yu","orcid":"https://orcid.org/0000-0001-5549-7091"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Hsiang-Fu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134030259","display_name":"Vinod Raman","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Raman, Vinod","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5063459703","display_name":"Inderjit S. Dhillon","orcid":"https://orcid.org/0000-0002-2759-1416"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dhillon, Inderjit S.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.3301999866962433,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.3301999866962433,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.2565000057220459,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.0348999984562397,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/preference","display_name":"Preference","score":0.7452999949455261},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6498000025749207},{"id":"https://openalex.org/keywords/decoupling","display_name":"Decoupling (probability)","score":0.4830000102519989},{"id":"https://openalex.org/keywords/preference-learning","display_name":"Preference learning","score":0.45559999346733093},{"id":"https://openalex.org/keywords/optimization-problem","display_name":"Optimization problem","score":0.4018999934196472},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.38609999418258667}],"concepts":[{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.7452999949455261},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7208999991416931},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6498000025749207},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5123000144958496},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.487199991941452},{"id":"https://openalex.org/C205606062","wikidata":"https://www.wikidata.org/wiki/Q5249645","display_name":"Decoupling (probability)","level":2,"score":0.4830000102519989},{"id":"https://openalex.org/C181204326","wikidata":"https://www.wikidata.org/wiki/Q7239820","display_name":"Preference learning","level":3,"score":0.45559999346733093},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.4018999934196472},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.38609999418258667},{"id":"https://openalex.org/C2987595161","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Optimization algorithm","level":2,"score":0.3391999900341034},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3206000030040741},{"id":"https://openalex.org/C2779110102","wikidata":"https://www.wikidata.org/wiki/Q1323737","display_name":"Revealed preference","level":2,"score":0.30709999799728394},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.3052999973297119},{"id":"https://openalex.org/C2777868144","wikidata":"https://www.wikidata.org/wiki/Q7239817","display_name":"Preference elicitation","level":3,"score":0.2808000147342682},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.2768000066280365},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.2551000118255615}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.15602","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.15602","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.15602","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.15602","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Preference":[0],"optimization":[1,83],"is":[2,133],"widely":[3],"used":[4],"to":[5,67],"align":[6],"Large":[7],"Language":[8],"Models":[9],"(LLMs)":[10],"with":[11,102],"preference":[12,31,47,82],"feedback.":[13],"However,":[14],"most":[15],"existing":[16],"methods":[17],"train":[18],"on":[19,130],"a":[20,79,125],"single":[21],"positive-negative":[22],"pair":[23],"per":[24],"prompt,":[25,57],"discarding":[26],"additional":[27],"supervision":[28],"available":[29],"in":[30],"datasets":[32],"that":[33,85,115],"typically":[34],"contain":[35],"multiple":[36,52,117],"candidate":[37],"responses.":[38],"Motivated":[39],"by":[40],"this":[41,75],"limitation,":[42],"recent":[43],"work":[44],"explores":[45],"group-wise":[46,81],"optimization,":[48],"which":[49,98],"jointly":[50],"contrasts":[51],"responses":[53,118,132],"for":[54,135],"the":[55,68],"same":[56],"but":[58],"its":[59],"empirical":[60],"behavior":[61],"and":[62,109,139],"scalability":[63],"remain":[64],"underexplored":[65],"due":[66],"memory":[69,96],"overhead":[70],"of":[71],"group-coupled":[72],"objectives.":[73],"In":[74],"work,":[76],"we":[77,113],"introduce":[78],"memory-efficient":[80],"algorithm":[84],"preserves":[86],"gradients":[87],"while":[88],"decoupling":[89],"samples":[90],"during":[91],"backpropagation,":[92],"substantially":[93],"reducing":[94],"peak":[95],"usage,":[97],"enables":[99],"scalable":[100],"training":[101,140],"larger":[103],"group":[104],"sizes.":[105],"Across":[106],"both":[107,136],"offline":[108],"online":[110],"alignment":[111],"settings,":[112],"show":[114],"leveraging":[116],"consistently":[119],"outperforms":[120],"single-pair":[121],"training.":[122],"Furthermore,":[123],"incorporating":[124],"negative":[126],"log-likelihood":[127],"(NLL)":[128],"term":[129],"positive":[131],"critical":[134],"performance":[137],"gains":[138],"stability.":[141]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-21T00:00:00"}
