{"id":"https://openalex.org/W4411055102","doi":"https://doi.org/10.1109/tbiom.2025.3577085","title":"Handling the Details: A Two-Stage Diffusion Approach to Improving Hands in Human Image Generation","display_name":"Handling the Details: A Two-Stage Diffusion Approach to Improving Hands in Human Image Generation","publication_year":2025,"publication_date":"2025-06-05","ids":{"openalex":"https://openalex.org/W4411055102","doi":"https://doi.org/10.1109/tbiom.2025.3577085"},"language":"en","primary_location":{"id":"doi:10.1109/tbiom.2025.3577085","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tbiom.2025.3577085","pdf_url":null,"source":{"id":"https://openalex.org/S4210209367","display_name":"IEEE Transactions on Biometrics Behavior and Identity Science","issn_l":"2637-6407","issn":["2637-6407"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Biometrics, Behavior, and Identity Science","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5090329344","display_name":"Anton Pelykh","orcid":null},"institutions":[{"id":"https://openalex.org/I28290843","display_name":"University of Surrey","ror":"https://ror.org/00ks66431","country_code":"GB","type":"education","lineage":["https://openalex.org/I28290843"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Anton Pelykh","raw_affiliation_strings":["Centre for Vision, Speech and Signal Processing, University of Surrey, Guildford, U.K"],"raw_orcid":"https://orcid.org/0009-0005-9075-5718","affiliations":[{"raw_affiliation_string":"Centre for Vision, Speech and Signal Processing, University of Surrey, Guildford, U.K","institution_ids":["https://openalex.org/I28290843"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055148145","display_name":"\u00d6zge Mercano\u011flu Sincan","orcid":"https://orcid.org/0000-0001-9131-0634"},"institutions":[{"id":"https://openalex.org/I28290843","display_name":"University of Surrey","ror":"https://ror.org/00ks66431","country_code":"GB","type":"education","lineage":["https://openalex.org/I28290843"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Ozge Mercanoglu Sincan","raw_affiliation_strings":["Centre for Vision, Speech and Signal Processing, University of Surrey, Guildford, U.K"],"raw_orcid":"https://orcid.org/0000-0001-9131-0634","affiliations":[{"raw_affiliation_string":"Centre for Vision, Speech and Signal Processing, University of Surrey, Guildford, U.K","institution_ids":["https://openalex.org/I28290843"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044490167","display_name":"Richard Bowden","orcid":"https://orcid.org/0000-0003-3285-8020"},"institutions":[{"id":"https://openalex.org/I28290843","display_name":"University of Surrey","ror":"https://ror.org/00ks66431","country_code":"GB","type":"education","lineage":["https://openalex.org/I28290843"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Richard Bowden","raw_affiliation_strings":["Centre for Vision, Speech and Signal Processing, University of Surrey, Guildford, U.K"],"raw_orcid":"https://orcid.org/0000-0003-3285-8020","affiliations":[{"raw_affiliation_string":"Centre for Vision, Speech and Signal Processing, University of Surrey, Guildford, U.K","institution_ids":["https://openalex.org/I28290843"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5090329344"],"corresponding_institution_ids":["https://openalex.org/I28290843"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.13090296,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"7","issue":"4","first_page":"890","last_page":"901"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.9872999787330627,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.9872999787330627,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9747999906539917,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9513999819755554,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/stage","display_name":"Stage (stratigraphy)","score":0.7148366570472717},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5401602387428284},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5327517986297607},{"id":"https://openalex.org/keywords/diffusion","display_name":"Diffusion","score":0.4379744529724121},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.41341519355773926},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4103514850139618},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.12385159730911255},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.07117551565170288}],"concepts":[{"id":"https://openalex.org/C146357865","wikidata":"https://www.wikidata.org/wiki/Q1123245","display_name":"Stage (stratigraphy)","level":2,"score":0.7148366570472717},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5401602387428284},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5327517986297607},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.4379744529724121},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41341519355773926},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4103514850139618},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.12385159730911255},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.07117551565170288},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tbiom.2025.3577085","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tbiom.2025.3577085","pdf_url":null,"source":{"id":"https://openalex.org/S4210209367","display_name":"IEEE Transactions on Biometrics Behavior and Identity Science","issn_l":"2637-6407","issn":["2637-6407"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Biometrics, Behavior, and Identity Science","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W2097117768","https://openalex.org/W2963266880","https://openalex.org/W2964002510","https://openalex.org/W3107825842","https://openalex.org/W4312262410","https://openalex.org/W4312933868","https://openalex.org/W4313030876","https://openalex.org/W4385271281","https://openalex.org/W4386076520","https://openalex.org/W4390871780","https://openalex.org/W4390873054","https://openalex.org/W4390873520","https://openalex.org/W4390874575","https://openalex.org/W4393148714","https://openalex.org/W4400527808","https://openalex.org/W4402660144","https://openalex.org/W4402727583","https://openalex.org/W4403619370","https://openalex.org/W4403780713"],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"There":[0],"has":[1],"been":[2],"significant":[3],"progress":[4],"in":[5,9,77,96,109,130,175],"human":[6,55,214],"image":[7,56,85,180,215],"generation":[8],"recent":[10],"years,":[11],"particularly":[12],"with":[13],"the":[14,24,33,66,74,93,97,110,115,125,136,144,149,160,166,185,195,198,210,219],"introduction":[15],"of":[16,100,127,143,159,168,197,212],"diffusion":[17],"models.":[18],"However,":[19],"it":[20],"is":[21,106],"challenging":[22],"for":[23],"existing":[25],"methods":[26],"to":[27,53,81,113,152],"produce":[28,82],"consistent":[29],"hand":[30,41,75,84,137,207],"anatomy,":[31],"and":[32,63,86,91,134,156,179,187],"generated":[34,199],"images":[35],"often":[36],"lack":[37],"precise":[38],"control":[39,205],"over":[40,172,206],"pose.":[42],"To":[43],"address":[44],"this":[45],"limitation,":[46],"we":[47,59],"introduce":[48,118],"a":[49,78,119,131,154],"novel":[50,120],"two-stage":[51],"approach":[52,191],"pose-conditioned":[54,213],"generation.":[57,101,216],"Firstly,":[58],"generate":[60],"detailed":[61],"hands":[62],"then":[64,107],"outpaint":[65,114],"body":[67],"around":[68],"those":[69],"hands.":[70],"We":[71,117,217],"propose":[72],"training":[73],"generator":[76],"multi-task":[79],"setting":[80],"both":[83,128,176],"their":[87],"corresponding":[88],"segmentation":[89],"masks,":[90],"employ":[92],"trained":[94],"model":[95,105],"first":[98],"stage":[99,112],"An":[102],"adapted":[103],"ControlNet":[104],"used":[108],"second":[111],"body.":[116],"blending":[121],"technique":[122],"that":[123],"combines":[124],"results":[126],"stages":[129],"coherent":[132],"way":[133],"preserves":[135],"details.":[138],"It":[139],"involves":[140],"sequential":[141],"expansion":[142],"outpainted":[145],"region":[146],"while":[147],"fusing":[148],"latent":[150],"representations,":[151],"ensure":[153],"seamless":[155],"cohesive":[157],"synthesis":[158],"final":[161],"image.":[162],"Experimental":[163],"evaluations":[164],"demonstrate":[165],"superiority":[167],"our":[169],"proposed":[170],"method":[171],"state-of-the-art":[173],"techniques":[174],"pose":[177],"accuracy":[178],"quality,":[181],"as":[182],"validated":[183],"on":[184],"HaGRID":[186],"YouTube-ASL":[188],"datasets.":[189],"Our":[190],"not":[192],"only":[193],"enhances":[194],"quality":[196],"hands,":[200],"but":[201],"also":[202],"offers":[203],"improved":[204],"pose,":[208],"advancing":[209],"capabilities":[211],"make":[218],"code":[220],"available.":[221]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
