{"id":"https://openalex.org/W7131421698","doi":"https://doi.org/10.48550/arxiv.2602.19350","title":"PoseCraft: Tokenized 3D Body Landmark and Camera Conditioning for Photorealistic Human Image Synthesis","display_name":"PoseCraft: Tokenized 3D Body Landmark and Camera Conditioning for Photorealistic Human Image Synthesis","publication_year":2026,"publication_date":"2026-02-22","ids":{"openalex":"https://openalex.org/W7131421698","doi":"https://doi.org/10.48550/arxiv.2602.19350"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2602.19350","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.19350","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2602.19350","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5025338897","display_name":"Zhilin Guo","orcid":"https://orcid.org/0000-0002-7660-3102"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Zhilin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126802523","display_name":"Jing Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Jing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126818160","display_name":"Kyle Fogarty","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fogarty, Kyle","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018981890","display_name":"Jingyi Wan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wan, Jingyi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069617567","display_name":"Boqiao Zhang","orcid":"https://orcid.org/0009-0005-4783-7152"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Boqiao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126802128","display_name":"Tianhao Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Tianhao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028709578","display_name":"Weihao Xia","orcid":"https://orcid.org/0000-0003-0087-3525"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xia, Weihao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126816416","display_name":"Chenliang Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Chenliang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126735339","display_name":"Sakar Khattar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Khattar, Sakar","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084520143","display_name":"Fangcheng Zhong","orcid":"https://orcid.org/0000-0001-5964-5282"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhong, Fangcheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019999252","display_name":"Cristina Nader Vasconcelos","orcid":"https://orcid.org/0000-0003-2112-4806"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vasconcelos, Cristina Nader","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5126814863","display_name":"Cengiz Oztireli","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Oztireli, Cengiz","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":12,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.6425999999046326,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.6425999999046326,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10719","display_name":"3D Shape Modeling and Analysis","score":0.12939999997615814,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.09790000319480896,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.6805999875068665},{"id":"https://openalex.org/keywords/landmark","display_name":"Landmark","score":0.6779000163078308},{"id":"https://openalex.org/keywords/rendering","display_name":"Rendering (computer graphics)","score":0.5491999983787537},{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.42910000681877136},{"id":"https://openalex.org/keywords/ambiguity","display_name":"Ambiguity","score":0.3497999906539917},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.3400000035762787},{"id":"https://openalex.org/keywords/identity","display_name":"Identity (music)","score":0.33250001072883606},{"id":"https://openalex.org/keywords/view-synthesis","display_name":"View synthesis","score":0.33059999346733093}],"concepts":[{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.7634000182151794},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6996999979019165},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.6805999875068665},{"id":"https://openalex.org/C2780297707","wikidata":"https://www.wikidata.org/wiki/Q4895393","display_name":"Landmark","level":2,"score":0.6779000163078308},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6772000193595886},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.5491999983787537},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.45660001039505005},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.42910000681877136},{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.3497999906539917},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.3400000035762787},{"id":"https://openalex.org/C2778355321","wikidata":"https://www.wikidata.org/wiki/Q17079427","display_name":"Identity (music)","level":2,"score":0.33250001072883606},{"id":"https://openalex.org/C2776449333","wikidata":"https://www.wikidata.org/wiki/Q7928781","display_name":"View synthesis","level":3,"score":0.33059999346733093},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.3093999922275543},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.29010000824928284},{"id":"https://openalex.org/C66629338","wikidata":"https://www.wikidata.org/wiki/Q189177","display_name":"3D computer graphics","level":3,"score":0.289000004529953},{"id":"https://openalex.org/C108882727","wikidata":"https://www.wikidata.org/wiki/Q2991685","display_name":"Solid modeling","level":2,"score":0.2824999988079071},{"id":"https://openalex.org/C30769735","wikidata":"https://www.wikidata.org/wiki/Q2165951","display_name":"Volume rendering","level":3,"score":0.2720000147819519},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.26750001311302185},{"id":"https://openalex.org/C2777897806","wikidata":"https://www.wikidata.org/wiki/Q568742","display_name":"3D modeling","level":2,"score":0.2567000091075897},{"id":"https://openalex.org/C153715457","wikidata":"https://www.wikidata.org/wiki/Q254183","display_name":"Augmented reality","level":2,"score":0.25110000371932983},{"id":"https://openalex.org/C3019007443","wikidata":"https://www.wikidata.org/wiki/Q568742","display_name":"3d model","level":2,"score":0.25110000371932983}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2602.19350","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.19350","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2602.19350","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.19350","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Digitizing":[0],"humans":[1],"and":[2,10,18,38,71,78,98,101,109,113,145,160],"synthesizing":[3],"photorealistic":[4,103],"avatars":[5],"with":[6],"explicit":[7],"3D":[8,53,69,88],"pose":[9,97],"camera":[11,72],"controls":[12],"are":[13],"central":[14],"to":[15,151],"VR,":[16],"telepresence,":[17],"entertainment.":[19],"Existing":[20],"skinning-based":[21],"workflows":[22],"require":[23],"laborious":[24],"manual":[25],"rigging":[26],"or":[27,148],"template-based":[28],"fittings,":[29],"while":[30,156],"neural":[31],"volumetric":[32,130,153],"methods":[33],"rely":[34],"on":[35,59],"canonical":[36],"templates":[37],"re-optimization":[39],"for":[40],"each":[41],"unseen":[42],"pose.":[43],"We":[44],"present":[45],"PoseCraft,":[46],"a":[47,121],"diffusion":[48,82],"framework":[49],"built":[50],"around":[51],"tokenized":[52],"interface:":[54],"instead":[55],"of":[56],"relying":[57],"only":[58],"rasterized":[60],"geometry":[61],"as":[62,74],"2D":[63,92],"control":[64],"images,":[65],"we":[66,117],"encode":[67],"sparse":[68],"landmarks":[70],"extrinsics":[73],"discrete":[75],"conditioning":[76],"tokens":[77],"inject":[79],"them":[80],"into":[81],"via":[83],"cross-attention.":[84],"Our":[85,132],"approach":[86],"preserves":[87],"semantics":[89],"by":[90],"avoiding":[91],"re-projection":[93],"ambiguity":[94],"under":[95],"large":[96],"viewpoint":[99],"changes,":[100],"produces":[102],"imagery":[104],"that":[105,125,135],"faithfully":[106],"captures":[107],"identity":[108],"appearance.":[110],"To":[111],"train":[112],"evaluate":[114],"at":[115],"scale,":[116],"also":[118],"implement":[119],"GenHumanRF,":[120],"data":[122],"generation":[123],"workflow":[124],"renders":[126],"diverse":[127],"supervision":[128],"from":[129],"reconstructions.":[131],"experiments":[133],"show":[134],"PoseCraft":[136],"achieves":[137],"significant":[138],"perceptual":[139],"quality":[140],"improvement":[141],"over":[142],"diffusion-centric":[143],"methods,":[144],"attains":[146],"better":[147,157],"comparable":[149],"metrics":[150],"latest":[152],"rendering":[154],"SOTA":[155],"preserving":[158],"fabric":[159],"hair":[161],"details.":[162]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-02-26T00:00:00"}
