{"id":"https://openalex.org/W4403791610","doi":"https://doi.org/10.1145/3664647.3681396","title":"CustomNet: Object Customization with Variable-Viewpoints in Text-to-Image Diffusion Models","display_name":"CustomNet: Object Customization with Variable-Viewpoints in Text-to-Image Diffusion Models","publication_year":2024,"publication_date":"2024-10-26","ids":{"openalex":"https://openalex.org/W4403791610","doi":"https://doi.org/10.1145/3664647.3681396"},"language":"en","primary_location":{"id":"doi:10.1145/3664647.3681396","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3664647.3681396","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3664647.3681396","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100581304","display_name":"Ziyang Yuan","orcid":"https://orcid.org/0009-0008-1302-2352"},"institutions":[{"id":"https://openalex.org/I3131625388","display_name":"University Town of Shenzhen","ror":"https://ror.org/05f5j6225","country_code":"CN","type":"education","lineage":["https://openalex.org/I3131625388"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ziyang Yuan","raw_affiliation_strings":["SIGS, Tsinghua University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"SIGS, Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I3131625388","https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013983776","display_name":"Mingdeng Cao","orcid":"https://orcid.org/0000-0002-6577-4715"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Mingdeng Cao","raw_affiliation_strings":["The University of Tokyo, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"The University of Tokyo, Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101538534","display_name":"Xintao Wang","orcid":"https://orcid.org/0000-0001-6585-8604"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xintao Wang","raw_affiliation_strings":["ARC Lab, Tencent PCG, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"ARC Lab, Tencent PCG, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101500719","display_name":"Zhongang Qi","orcid":"https://orcid.org/0000-0001-8298-4063"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhongang Qi","raw_affiliation_strings":["ARC Lab, Tencent PCG, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"ARC Lab, Tencent PCG, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101456902","display_name":"Chun Yuan","orcid":"https://orcid.org/0000-0002-3590-6676"},"institutions":[{"id":"https://openalex.org/I3131625388","display_name":"University Town of Shenzhen","ror":"https://ror.org/05f5j6225","country_code":"CN","type":"education","lineage":["https://openalex.org/I3131625388"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chun Yuan","raw_affiliation_strings":["SIGS, Tsinghua University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"SIGS, Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I3131625388","https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102004349","display_name":"Ying Shan","orcid":"https://orcid.org/0000-0001-7673-8325"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ying Shan","raw_affiliation_strings":["ARC Lab, Tencent PCG, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"ARC Lab, Tencent PCG, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100581304"],"corresponding_institution_ids":["https://openalex.org/I3131625388","https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":2.0799,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.88697966,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"10976","last_page":"10984"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9659000039100647,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9659000039100647,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10799","display_name":"Data Visualization and Analytics","score":0.9623000025749207,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10481","display_name":"Computer Graphics and Visualization Techniques","score":0.9502999782562256,"subfield":{"id":"https://openalex.org/subfields/1704","display_name":"Computer Graphics and Computer-Aided Design"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/viewpoints","display_name":"Viewpoints","score":0.7744036912918091},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7234125137329102},{"id":"https://openalex.org/keywords/personalization","display_name":"Personalization","score":0.6628932356834412},{"id":"https://openalex.org/keywords/variable","display_name":"Variable (mathematics)","score":0.5485336780548096},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5045112371444702},{"id":"https://openalex.org/keywords/diffusion","display_name":"Diffusion","score":0.44844701886177063},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.4351697862148285},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4145389199256897},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.39434558153152466},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.34669229388237},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.19910475611686707},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09473329782485962},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.057986289262771606}],"concepts":[{"id":"https://openalex.org/C2776035091","wikidata":"https://www.wikidata.org/wiki/Q7928819","display_name":"Viewpoints","level":2,"score":0.7744036912918091},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7234125137329102},{"id":"https://openalex.org/C183003079","wikidata":"https://www.wikidata.org/wiki/Q1000371","display_name":"Personalization","level":2,"score":0.6628932356834412},{"id":"https://openalex.org/C182365436","wikidata":"https://www.wikidata.org/wiki/Q50701","display_name":"Variable (mathematics)","level":2,"score":0.5485336780548096},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5045112371444702},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.44844701886177063},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.4351697862148285},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4145389199256897},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39434558153152466},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.34669229388237},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.19910475611686707},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09473329782485962},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.057986289262771606},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3664647.3681396","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3664647.3681396","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3664647.3681396","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3664647.3681396","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W2598591334","https://openalex.org/W2895191479","https://openalex.org/W2982695696","https://openalex.org/W3034684802","https://openalex.org/W3109585842","https://openalex.org/W3171358896","https://openalex.org/W3176368002","https://openalex.org/W3203583526","https://openalex.org/W4212774754","https://openalex.org/W4229682756","https://openalex.org/W4288083516","https://openalex.org/W4312453532","https://openalex.org/W4312698476","https://openalex.org/W4312933868","https://openalex.org/W4312966374","https://openalex.org/W4386072096","https://openalex.org/W4386075660","https://openalex.org/W4386076027","https://openalex.org/W4386083141","https://openalex.org/W4390873915","https://openalex.org/W4390874575"],"related_works":["https://openalex.org/W2385368906","https://openalex.org/W2902924992","https://openalex.org/W2626642044","https://openalex.org/W93537448","https://openalex.org/W2619807045","https://openalex.org/W2388758053","https://openalex.org/W2949734191","https://openalex.org/W2017333877","https://openalex.org/W2048332520","https://openalex.org/W4233821346"],"abstract_inverted_index":{"Incorporating":[0],"a":[1,60,104],"customized":[2],"object":[3,22,40,63,139],"into":[4,74],"image":[5],"generation":[6],"presents":[7],"an":[8,35],"attractive":[9],"feature":[10],"in":[11],"text-to-image":[12],"(T2I)":[13],"generation.":[14],"Some":[15],"methods":[16,164],"finetune":[17],"T2I":[18],"models":[19],"for":[20,43,138],"each":[21],"individually":[23],"at":[24,174],"test-time,":[25],"which":[26],"tend":[27],"to":[28,38,48,108],"be":[29],"overfitted":[30],"and":[31,86,113,125,153,169],"time-consuming.":[32],"Others":[33],"train":[34,98],"extra":[36],"encoder":[37],"extract":[39],"visual":[41],"information":[42],"customization":[44,64,76,140,163],"efficiently":[45],"but":[46],"struggle":[47],"preserve":[49],"the":[50,75,81,94,142],"object's":[51,95],"identity.":[52,96],"To":[53,97],"address":[54],"these":[55],"limitations,":[56],"we":[57,102,117],"present":[58],"CustomNet,":[59],"unified":[61],"encoder-based":[62],"framework":[65],"that":[66,121,158],"explicitly":[67],"incorporates":[68],"3D":[69],"novel":[70],"view":[71],"synthesis":[72],"capabilities":[73],"process.":[77],"This":[78],"integration":[79],"facilitates":[80],"adjustment":[82],"of":[83,144],"spatial":[84],"positions":[85],"viewpoints,":[87,151],"producing":[88],"diverse":[89],"outputs":[90],"while":[91],"effectively":[92],"preserving":[93],"our":[99,159],"model":[100],"effectively,":[101],"propose":[103],"dataset":[105],"construction":[106],"pipeline":[107],"better":[109],"handle":[110],"real-world":[111],"objects":[112],"complex":[114],"backgrounds.":[115,134],"Additionally,":[116],"introduce":[118],"delicate":[119],"designs":[120],"enable":[122],"location":[123],"control":[124,128,149],"flexible":[126],"background":[127],"through":[129],"textual":[130],"descriptions":[131],"or":[132],"user-defined":[133],"Our":[135],"method":[136,160],"allows":[137],"without":[141],"need":[143],"test-time":[145],"optimization,":[146],"providing":[147],"simultaneous":[148],"over":[150],"location,":[152],"text.":[154],"Experimental":[155],"results":[156],"show":[157],"outperforms":[161],"other":[162],"regarding":[165],"identity":[166],"preservation,":[167],"diversity,":[168],"harmony.":[170],"Codes":[171],"are":[172],"available":[173],"https://github.com/TencentARC/CustomNet.":[175]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":3}],"updated_date":"2026-03-09T08:58:05.943551","created_date":"2025-10-10T00:00:00"}
