{"id":"https://openalex.org/W4392931598","doi":"https://doi.org/10.1109/icassp48485.2024.10446608","title":"Scene Sketch-to-Image Synthesis Based on Multi-Object Control","display_name":"Scene Sketch-to-Image Synthesis Based on Multi-Object Control","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392931598","doi":"https://doi.org/10.1109/icassp48485.2024.10446608"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10446608","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10446608","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5010655699","display_name":"Zhenwei Cheng","orcid":"https://orcid.org/0009-0008-6838-8634"},"institutions":[{"id":"https://openalex.org/I154099455","display_name":"Shandong University","ror":"https://ror.org/0207yh398","country_code":"CN","type":"education","lineage":["https://openalex.org/I154099455"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhenwei Cheng","raw_affiliation_strings":["Shandong University,School of Software,Jinan,China","School of Software, Shandong University, Jinan, China"],"affiliations":[{"raw_affiliation_string":"Shandong University,School of Software,Jinan,China","institution_ids":["https://openalex.org/I154099455"]},{"raw_affiliation_string":"School of Software, Shandong University, Jinan, China","institution_ids":["https://openalex.org/I154099455"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101963649","display_name":"Lei Wu","orcid":"https://orcid.org/0009-0007-7363-0002"},"institutions":[{"id":"https://openalex.org/I154099455","display_name":"Shandong University","ror":"https://ror.org/0207yh398","country_code":"CN","type":"education","lineage":["https://openalex.org/I154099455"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lei Wu","raw_affiliation_strings":["Shandong University,School of Software,Jinan,China","School of Software, Shandong University, Jinan, China"],"affiliations":[{"raw_affiliation_string":"Shandong University,School of Software,Jinan,China","institution_ids":["https://openalex.org/I154099455"]},{"raw_affiliation_string":"School of Software, Shandong University, Jinan, China","institution_ids":["https://openalex.org/I154099455"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037445341","display_name":"Changshuo Wang","orcid":"https://orcid.org/0000-0002-4056-4922"},"institutions":[{"id":"https://openalex.org/I154099455","display_name":"Shandong University","ror":"https://ror.org/0207yh398","country_code":"CN","type":"education","lineage":["https://openalex.org/I154099455"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Changshuo Wang","raw_affiliation_strings":["Shandong University,School of Software,Jinan,China","School of Software, Shandong University, Jinan, China"],"affiliations":[{"raw_affiliation_string":"Shandong University,School of Software,Jinan,China","institution_ids":["https://openalex.org/I154099455"]},{"raw_affiliation_string":"School of Software, Shandong University, Jinan, China","institution_ids":["https://openalex.org/I154099455"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101536417","display_name":"Xiangxu Meng","orcid":"https://orcid.org/0000-0001-7290-5659"},"institutions":[{"id":"https://openalex.org/I154099455","display_name":"Shandong University","ror":"https://ror.org/0207yh398","country_code":"CN","type":"education","lineage":["https://openalex.org/I154099455"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiangxu Meng","raw_affiliation_strings":["Shandong University,School of Software,Jinan,China","School of Software, Shandong University, Jinan, China"],"affiliations":[{"raw_affiliation_string":"Shandong University,School of Software,Jinan,China","institution_ids":["https://openalex.org/I154099455"]},{"raw_affiliation_string":"School of Software, Shandong University, Jinan, China","institution_ids":["https://openalex.org/I154099455"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5010655699"],"corresponding_institution_ids":["https://openalex.org/I154099455"],"apc_list":null,"apc_paid":null,"fwci":0.7949,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.70356622,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"3775","last_page":"3779"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10481","display_name":"Computer Graphics and Visualization Techniques","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1704","display_name":"Computer Graphics and Computer-Aided Design"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.860636830329895},{"id":"https://openalex.org/keywords/sketch","display_name":"Sketch","score":0.826391339302063},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7413201332092285},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.6752079129219055},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.6133774518966675},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5135788917541504},{"id":"https://openalex.org/keywords/view-synthesis","display_name":"View synthesis","score":0.5076640844345093},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.4867112934589386},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.4749579429626465},{"id":"https://openalex.org/keywords/texture-synthesis","display_name":"Texture synthesis","score":0.41489747166633606},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4147833585739136},{"id":"https://openalex.org/keywords/image-synthesis","display_name":"Image synthesis","score":0.41255927085876465},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.3873808979988098},{"id":"https://openalex.org/keywords/rendering","display_name":"Rendering (computer graphics)","score":0.174125075340271},{"id":"https://openalex.org/keywords/image-texture","display_name":"Image texture","score":0.16147953271865845}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.860636830329895},{"id":"https://openalex.org/C2779231336","wikidata":"https://www.wikidata.org/wiki/Q7534724","display_name":"Sketch","level":2,"score":0.826391339302063},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7413201332092285},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6752079129219055},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.6133774518966675},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5135788917541504},{"id":"https://openalex.org/C2776449333","wikidata":"https://www.wikidata.org/wiki/Q7928781","display_name":"View synthesis","level":3,"score":0.5076640844345093},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.4867112934589386},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.4749579429626465},{"id":"https://openalex.org/C50494287","wikidata":"https://www.wikidata.org/wiki/Q658467","display_name":"Texture synthesis","level":5,"score":0.41489747166633606},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4147833585739136},{"id":"https://openalex.org/C2989087649","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Image synthesis","level":3,"score":0.41255927085876465},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.3873808979988098},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.174125075340271},{"id":"https://openalex.org/C63099799","wikidata":"https://www.wikidata.org/wiki/Q17147001","display_name":"Image texture","level":4,"score":0.16147953271865845},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10446608","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10446608","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W2133665775","https://openalex.org/W2612690371","https://openalex.org/W2884466206","https://openalex.org/W2885121511","https://openalex.org/W2941483950","https://openalex.org/W2963073614","https://openalex.org/W2963561004","https://openalex.org/W2978052557","https://openalex.org/W2981824749","https://openalex.org/W3035124078","https://openalex.org/W3036167779","https://openalex.org/W3115218903","https://openalex.org/W3175344711","https://openalex.org/W4213207493","https://openalex.org/W4226077360","https://openalex.org/W4281718656","https://openalex.org/W4312933868","https://openalex.org/W4319301041","https://openalex.org/W4320013936","https://openalex.org/W4320901388","https://openalex.org/W4321276804","https://openalex.org/W4372259905","https://openalex.org/W4377164421","https://openalex.org/W4385537492","https://openalex.org/W4386172409","https://openalex.org/W4390873054","https://openalex.org/W4393148714","https://openalex.org/W6765779288","https://openalex.org/W6779823529","https://openalex.org/W6853140221"],"related_works":["https://openalex.org/W2539134624","https://openalex.org/W2801494623","https://openalex.org/W2397696627","https://openalex.org/W2083583319","https://openalex.org/W2611780867","https://openalex.org/W2372792416","https://openalex.org/W2365582905","https://openalex.org/W2566018851","https://openalex.org/W2927209830","https://openalex.org/W1549385931"],"abstract_inverted_index":{"Scene":[0],"sketch-to-image":[1,48],"synthesis":[2,35],"is":[3,99],"a":[4,46,70,102],"challenging":[5],"task,":[6],"especially":[7],"when":[8,25],"the":[9,84],"sketches":[10,64,128],"contain":[11],"multiple":[12],"objects":[13,24,88,92],"of":[14,23,86],"different":[15,21],"classes.":[16,40],"Existing":[17],"methods":[18],"interfere":[19],"between":[20],"classes":[22,85],"generating":[26],"images":[27,36,61,125],"from":[28,62,126],"scene":[29,47,63,127],"sketches,":[30],"making":[31],"it":[32],"difficult":[33],"to":[34,121],"with":[37],"accurate":[38],"object":[39],"In":[41],"this":[42],"paper,":[43],"we":[44],"propose":[45,69],"generation":[49],"method":[50,98],"based":[51,73,100],"on":[52,74,101,111],"multi-object":[53],"control,":[54],"which":[55,80],"can":[56,81],"generate":[57,122],"high-quality":[58],"and":[59,65,77,89,93,113,129],"class-accurate":[60],"text":[66,130],"prompts.":[67,131],"We":[68],"sampling":[71],"strategy":[72],"segmentation":[75],"mask":[76],"independent":[78],"denoising,":[79],"accurately":[82],"control":[83],"foreground":[87,91],"make":[90],"background":[94],"more":[95],"harmonized.":[96],"Our":[97],"pre-trained":[103],"diffusion":[104],"model":[105],"without":[106],"additional":[107],"training":[108],"overhead.":[109],"Experiments":[110],"SketchyCOCO":[112],"SketchyScene":[114],"datasets":[115],"demonstrate":[116],"that":[117],"our":[118],"method\u2019s":[119],"capacity":[120],"realistic":[123],"complex":[124]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
