{"id":"https://openalex.org/W4408399748","doi":"https://doi.org/10.1109/tcss.2025.3547421","title":"Self-Prompt Guided Image Outpainting Model for Captions Absence in Social Scenes","display_name":"Self-Prompt Guided Image Outpainting Model for Captions Absence in Social Scenes","publication_year":2025,"publication_date":"2025-03-13","ids":{"openalex":"https://openalex.org/W4408399748","doi":"https://doi.org/10.1109/tcss.2025.3547421"},"language":"en","primary_location":{"id":"doi:10.1109/tcss.2025.3547421","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcss.2025.3547421","pdf_url":null,"source":{"id":"https://openalex.org/S2490693980","display_name":"IEEE Transactions on Computational Social Systems","issn_l":"2329-924X","issn":["2329-924X","2373-7476"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computational Social Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5045475334","display_name":"Zongyan Zhang","orcid":"https://orcid.org/0009-0000-5132-3326"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zongyan Zhang","raw_affiliation_strings":["Guangdong Provincial Key Laboratory of AI Large Model and Intelligent Cognition, The School of Computer Science and Engineering, South China University of Technology, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"Guangdong Provincial Key Laboratory of AI Large Model and Intelligent Cognition, The School of Computer Science and Engineering, South China University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100643265","display_name":"C. L. Philip Chen","orcid":"https://orcid.org/0000-0001-5451-7230"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"C. L. Philip Chen","raw_affiliation_strings":["Guangdong Provincial Key Laboratory of AI Large Model and Intelligent Cognition, The School of Computer Science and Engineering, South China University of Technology, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"Guangdong Provincial Key Laboratory of AI Large Model and Intelligent Cognition, The School of Computer Science and Engineering, South China University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007678458","display_name":"Haohan Weng","orcid":"https://orcid.org/0000-0003-4954-4546"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haohan Weng","raw_affiliation_strings":["Guangdong Provincial Key Laboratory of AI Large Model and Intelligent Cognition, The School of Computer Science and Engineering, South China University of Technology, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"Guangdong Provincial Key Laboratory of AI Large Model and Intelligent Cognition, The School of Computer Science and Engineering, South China University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100378800","display_name":"Tong Zhang","orcid":"https://orcid.org/0000-0002-7025-6365"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tong Zhang","raw_affiliation_strings":["Guangdong Provincial Key Laboratory of AI Large Model and Intelligent Cognition, The School of Computer Science and Engineering, South China University of Technology, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"Guangdong Provincial Key Laboratory of AI Large Model and Intelligent Cognition, The School of Computer Science and Engineering, South China University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I90610280"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5045475334"],"corresponding_institution_ids":["https://openalex.org/I90610280"],"apc_list":null,"apc_paid":null,"fwci":1.3688,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.78349986,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"12","issue":"5","first_page":"3156","last_page":"3167"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9787999987602234,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9787999987602234,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9729999899864197,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9613000154495239,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5277933478355408},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5232792496681213},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.46633148193359375},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4596521258354187},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.39951372146606445},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.3457455039024353}],"concepts":[{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5277933478355408},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5232792496681213},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46633148193359375},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4596521258354187},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.39951372146606445},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.3457455039024353}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcss.2025.3547421","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcss.2025.3547421","pdf_url":null,"source":{"id":"https://openalex.org/S2490693980","display_name":"IEEE Transactions on Computational Social Systems","issn_l":"2329-924X","issn":["2329-924X","2373-7476"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computational Social Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.5199999809265137}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W2087366239","https://openalex.org/W2106477124","https://openalex.org/W2131585809","https://openalex.org/W2160526525","https://openalex.org/W2732026016","https://openalex.org/W2948627722","https://openalex.org/W3040932449","https://openalex.org/W3091119272","https://openalex.org/W3118896137","https://openalex.org/W3120472021","https://openalex.org/W3138794164","https://openalex.org/W3180355996","https://openalex.org/W4205220354","https://openalex.org/W4251264660","https://openalex.org/W4312315996","https://openalex.org/W4312933868","https://openalex.org/W4313021454","https://openalex.org/W4319989989","https://openalex.org/W4324119531","https://openalex.org/W4377716587","https://openalex.org/W4381736219","https://openalex.org/W4385413695","https://openalex.org/W4385627244","https://openalex.org/W4386072096","https://openalex.org/W4386075999","https://openalex.org/W4386076112","https://openalex.org/W4388919339","https://openalex.org/W4389633660","https://openalex.org/W4390871525","https://openalex.org/W4390873054","https://openalex.org/W4393148714","https://openalex.org/W4402727926"],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"The":[0,147],"limitations":[1],"of":[2,11,20,37,46,58,84,110,143,165,181,196],"acquisition":[3],"equipment":[4],"often":[5],"result":[6],"in":[7,25,64,81],"scene":[8,40],"image":[9,22,30,48,65,101,166],"data":[10],"limited":[12],"size,":[13],"posing":[14],"a":[15,76,79,95,124,144,186],"challenge":[16],"for":[17,100],"comprehensive":[18],"analysis":[19],"social":[21,39,47],"datasets.":[23],"Advances":[24],"generative":[26,179],"models":[27],"have":[28],"introduced":[29],"outpainting":[31,102],"techniques":[32],"that":[33,103,127,214],"expand":[34],"the":[35,44,56,82,108,111,116,141,157,163,169,177,193,208,229],"size":[36],"acquired":[38],"images,":[41],"thereby":[42,114],"enhancing":[43],"value":[45],"data.":[49],"Stable":[50],"diffusion":[51,98,171,182,231],"(SD),":[52],"which":[53],"benefits":[54],"from":[55],"guidance":[57],"caption":[59],"prompts,":[60],"shows":[61],"excellent":[62],"performance":[63],"outpainting.":[66],"However,":[67],"its":[68],"heavy":[69],"reliance":[70],"on":[71,107,118,207,221],"manual":[72,119],"prompts":[73],"leads":[74],"to":[75,132,168,191,228],"significant":[77],"drawback:":[78],"decrease":[80],"quality":[83,223],"generated":[85,203],"images":[86,105],"without":[87,233],"prompts.":[88,120],"To":[89],"overcome":[90],"this":[91],"challenge,":[92],"we":[93,122,184],"propose":[94],"novel":[96],"self-prompt":[97],"model":[99,232],"extrapolates":[104],"based":[106],"semantics":[109],"source":[112],"image,":[113],"removing":[115],"dependence":[117],"Specifically,":[121],"design":[123],"prompt":[125,134,151,159,172],"autoencoder":[126],"uses":[128],"an":[129],"autoregressive":[130],"transformer":[131],"map":[133],"embeddings":[135,152],"into":[136],"their":[137],"semantic":[138,145,148],"space,":[139],"facilitating":[140],"construction":[142],"decoder.":[146],"decoder":[149],"and":[150,202,210,225],"are":[153],"then":[154],"cooptimized":[155],"within":[156],"proposed":[158],"embedding":[160],"network,":[161],"allowing":[162],"mapping":[164],"features":[167],"stable":[170,230],"embeddings.":[173],"Furthermore,":[174],"by":[175],"exploiting":[176],"inherent":[178],"capabilities":[180],"models,":[183],"introduce":[185],"seam":[187,197],"line":[188],"regeneration":[189],"mechanism":[190],"address":[192],"common":[194],"problem":[195],"lines":[198],"when":[199],"splicing":[200],"input":[201],"images.":[204],"Comparative":[205],"experiments":[206],"Places2":[209],"COCO":[211],"datasets":[212],"show":[213],"our":[215],"method":[216],"outperforms":[217],"current":[218],"state-of-the-art":[219],"approaches":[220],"visual":[222],"metrics":[224],"is":[226],"adaptable":[227],"additional":[234],"fine-tuning.":[235]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-12-28T23:10:05.387466","created_date":"2025-10-10T00:00:00"}
