{"id":"https://openalex.org/W7133320976","doi":"https://doi.org/10.48550/arxiv.2603.01594","title":"Preference Score Distillation: Leveraging 2D Rewards to Align Text-to-3D Generation with Human Preference","display_name":"Preference Score Distillation: Leveraging 2D Rewards to Align Text-to-3D Generation with Human Preference","publication_year":2026,"publication_date":"2026-03-02","ids":{"openalex":"https://openalex.org/W7133320976","doi":"https://doi.org/10.48550/arxiv.2603.01594"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.01594","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.01594","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.01594","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5127910293","display_name":"Jiaqi Leng","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Leng, Jiaqi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025369047","display_name":"Shuyuan Tu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tu, Shuyuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128030618","display_name":"Haidong Cao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cao, Haidong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127929696","display_name":"Sicheng Xie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xie, Sicheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127948446","display_name":"Daoguo Dong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dong, Daoguo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127979807","display_name":"Zuxuan Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Zuxuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5127916632","display_name":"Yu-Gang Jiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Yu-Gang","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5127910293"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.4921000003814697,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.4921000003814697,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10719","display_name":"3D Shape Modeling and Analysis","score":0.18569999933242798,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.03680000081658363,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/preference","display_name":"Preference","score":0.7900000214576721},{"id":"https://openalex.org/keywords/preference-learning","display_name":"Preference learning","score":0.5482000112533569},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.5331000089645386},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.44029998779296875},{"id":"https://openalex.org/keywords/preference-elicitation","display_name":"Preference elicitation","score":0.2962000072002411}],"concepts":[{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.7900000214576721},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6032000184059143},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5972999930381775},{"id":"https://openalex.org/C181204326","wikidata":"https://www.wikidata.org/wiki/Q7239820","display_name":"Preference learning","level":3,"score":0.5482000112533569},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.5331000089645386},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5146999955177307},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.44029998779296875},{"id":"https://openalex.org/C2777868144","wikidata":"https://www.wikidata.org/wiki/Q7239817","display_name":"Preference elicitation","level":3,"score":0.2962000072002411},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.2948000133037567},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.2842999994754791},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.27399998903274536},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.2549000084400177}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.01594","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.01594","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.01594","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.01594","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Human":[0],"preference":[1,102,131,161],"alignment":[2,103,162],"presents":[3],"a":[4,105],"critical":[5],"yet":[6],"underexplored":[7],"challenge":[8],"for":[9,46],"diffusion":[10,97,120],"models":[11,45,121],"in":[12,24,90,95,176],"text-to-3D":[13,48],"generation.":[14],"Existing":[15],"solutions":[16],"typically":[17],"require":[18],"task-specific":[19],"fine-tuning,":[20],"posing":[21],"significant":[22],"hurdles":[23],"data-scarce":[25],"3D":[26,51],"domains.":[27],"To":[28,151],"address":[29],"this,":[30],"we":[31,99,124,154],"propose":[32],"Preference":[33],"Score":[34],"Distillation":[35],"(PSD),":[36],"an":[37,126],"optimization-based":[38],"framework":[39],"that":[40,86,117],"leverages":[41],"pretrained":[42,119],"2D":[43,78],"reward":[44,72,79,113],"human-aligned":[47],"synthesis":[49],"without":[50],"training":[52],"data.":[53],"Our":[54],"key":[55],"insight":[56],"stems":[57],"from":[58],"the":[59,66,82,91,156,172],"incompatibility":[60],"of":[61,68,77,144,174],"pixel-level":[62],"gradients:":[63],"due":[64],"to":[65,129,158],"absence":[67],"noisy":[69],"samples":[70],"during":[71,140],"model":[73],"training,":[74],"direct":[75],"application":[76],"gradients":[80],"disturbs":[81],"denoising":[83],"process.":[84],"Noticing":[85],"similar":[87],"issue":[88],"occurs":[89],"naive":[92],"classifier":[93],"guidance":[94,107],"conditioned":[96],"models,":[98],"fundamentally":[100],"rethink":[101],"as":[104],"classifier-free":[106],"(CFG)-style":[108],"mechanism":[109],"through":[110],"our":[111,152],"implicit":[112],"model.":[114],"Furthermore,":[115],"recognizing":[116],"frozen":[118],"constrain":[122],"performance,":[123],"introduce":[125],"adaptive":[127],"strategy":[128],"co-optimize":[130],"scores":[132],"and":[133,184],"negative":[134,145],"text":[135,146],"embeddings.":[136],"By":[137],"incorporating":[138],"CFG":[139,164],"optimization,":[141],"online":[142],"refinement":[143],"embeddings":[147],"dynamically":[148],"enhances":[149],"alignment.":[150],"knowledge,":[153],"are":[155],"first":[157],"bridge":[159],"human":[160],"with":[163,181],"theory":[165],"under":[166],"score":[167],"distillation":[168],"framework.":[169],"Experiments":[170],"demonstrate":[171],"superiority":[173],"PSD":[175],"aesthetic":[177],"metrics,":[178],"seamless":[179],"integration":[180],"diverse":[182],"pipelines,":[183],"strong":[185],"extensibility.":[186]},"counts_by_year":[],"updated_date":"2026-03-04T07:09:34.246503","created_date":"2026-03-04T00:00:00"}
