{"id":"https://openalex.org/W4405270932","doi":"https://doi.org/10.1109/cvmi61877.2024.10782008","title":"Octopus: A Latent Diffusion Model for Enhanced Text-Driven Manipulation in Image Synthesis","display_name":"Octopus: A Latent Diffusion Model for Enhanced Text-Driven Manipulation in Image Synthesis","publication_year":2024,"publication_date":"2024-10-19","ids":{"openalex":"https://openalex.org/W4405270932","doi":"https://doi.org/10.1109/cvmi61877.2024.10782008"},"language":"en","primary_location":{"id":"doi:10.1109/cvmi61877.2024.10782008","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvmi61877.2024.10782008","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Computer Vision and Machine Intelligence (CVMI)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5115395127","display_name":"M Nithin Skantha","orcid":null},"institutions":[{"id":"https://openalex.org/I81556334","display_name":"Amrita Vishwa Vidyapeetham","ror":"https://ror.org/03am10p12","country_code":"IN","type":"education","lineage":["https://openalex.org/I81556334"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"M Nithin Skantha","raw_affiliation_strings":["Coimbatore Amrita Vishwa Vidyapeetham,Amrita School of Computing,Department of Computer Science and Engineering,India"],"affiliations":[{"raw_affiliation_string":"Coimbatore Amrita Vishwa Vidyapeetham,Amrita School of Computing,Department of Computer Science and Engineering,India","institution_ids":["https://openalex.org/I81556334"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115395128","display_name":"B Meghadharsan","orcid":null},"institutions":[{"id":"https://openalex.org/I81556334","display_name":"Amrita Vishwa Vidyapeetham","ror":"https://ror.org/03am10p12","country_code":"IN","type":"education","lineage":["https://openalex.org/I81556334"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"B Meghadharsan","raw_affiliation_strings":["Coimbatore Amrita Vishwa Vidyapeetham,Amrita School of Computing,Department of Computer Science and Engineering,India"],"affiliations":[{"raw_affiliation_string":"Coimbatore Amrita Vishwa Vidyapeetham,Amrita School of Computing,Department of Computer Science and Engineering,India","institution_ids":["https://openalex.org/I81556334"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048612697","display_name":"C Sri Vignesh","orcid":null},"institutions":[{"id":"https://openalex.org/I81556334","display_name":"Amrita Vishwa Vidyapeetham","ror":"https://ror.org/03am10p12","country_code":"IN","type":"education","lineage":["https://openalex.org/I81556334"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"C Sri Vignesh","raw_affiliation_strings":["Coimbatore Amrita Vishwa Vidyapeetham,Amrita School of Computing,Department of Computer Science and Engineering,India"],"affiliations":[{"raw_affiliation_string":"Coimbatore Amrita Vishwa Vidyapeetham,Amrita School of Computing,Department of Computer Science and Engineering,India","institution_ids":["https://openalex.org/I81556334"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115395129","display_name":"J Thiruselvan","orcid":null},"institutions":[{"id":"https://openalex.org/I81556334","display_name":"Amrita Vishwa Vidyapeetham","ror":"https://ror.org/03am10p12","country_code":"IN","type":"education","lineage":["https://openalex.org/I81556334"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"J Thiruselvan","raw_affiliation_strings":["Coimbatore Amrita Vishwa Vidyapeetham,Amrita School of Computing,Department of Computer Science and Engineering,India"],"affiliations":[{"raw_affiliation_string":"Coimbatore Amrita Vishwa Vidyapeetham,Amrita School of Computing,Department of Computer Science and Engineering,India","institution_ids":["https://openalex.org/I81556334"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5011070228","display_name":"Arti Anuragi","orcid":"https://orcid.org/0000-0002-6999-2947"},"institutions":[{"id":"https://openalex.org/I81556334","display_name":"Amrita Vishwa Vidyapeetham","ror":"https://ror.org/03am10p12","country_code":"IN","type":"education","lineage":["https://openalex.org/I81556334"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Arti Anuragi","raw_affiliation_strings":["Coimbatore Amrita Vishwa Vidyapeetham,Amrita School of Computing,Department of Computer Science and Engineering,India"],"affiliations":[{"raw_affiliation_string":"Coimbatore Amrita Vishwa Vidyapeetham,Amrita School of Computing,Department of Computer Science and Engineering,India","institution_ids":["https://openalex.org/I81556334"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5115395127"],"corresponding_institution_ids":["https://openalex.org/I81556334"],"apc_list":null,"apc_paid":null,"fwci":1.286,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.89292023,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10481","display_name":"Computer Graphics and Visualization Techniques","score":0.9430999755859375,"subfield":{"id":"https://openalex.org/subfields/1704","display_name":"Computer Graphics and Computer-Aided Design"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10481","display_name":"Computer Graphics and Visualization Techniques","score":0.9430999755859375,"subfield":{"id":"https://openalex.org/subfields/1704","display_name":"Computer Graphics and Computer-Aided Design"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.9352999925613403,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9261000156402588,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/octopus","display_name":"octopus (software)","score":0.8211844563484192},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6266814470291138},{"id":"https://openalex.org/keywords/diffusion","display_name":"Diffusion","score":0.5078499913215637},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5031210780143738},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.47803816199302673},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4518822431564331},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.35860398411750793},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.09609296917915344}],"concepts":[{"id":"https://openalex.org/C2779205690","wikidata":"https://www.wikidata.org/wiki/Q7077081","display_name":"octopus (software)","level":2,"score":0.8211844563484192},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6266814470291138},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.5078499913215637},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5031210780143738},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47803816199302673},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4518822431564331},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.35860398411750793},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.09609296917915344},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cvmi61877.2024.10782008","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvmi61877.2024.10782008","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Computer Vision and Machine Intelligence (CVMI)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W2018377917","https://openalex.org/W2132914434","https://openalex.org/W2760850206","https://openalex.org/W2962770929","https://openalex.org/W2964328732","https://openalex.org/W4206820829","https://openalex.org/W4312282373","https://openalex.org/W4312574495","https://openalex.org/W4312872987","https://openalex.org/W4312933868","https://openalex.org/W4318570677","https://openalex.org/W4386071707","https://openalex.org/W4386072096","https://openalex.org/W4386076215","https://openalex.org/W4390874089","https://openalex.org/W4390874575","https://openalex.org/W4392822465","https://openalex.org/W6796581206","https://openalex.org/W6801441102","https://openalex.org/W6840155194","https://openalex.org/W6841366371","https://openalex.org/W6847076894","https://openalex.org/W6850625674","https://openalex.org/W6854866820"],"related_works":["https://openalex.org/W4401932566","https://openalex.org/W3092465102","https://openalex.org/W2002595366","https://openalex.org/W2074891905","https://openalex.org/W4393356534","https://openalex.org/W2525321787","https://openalex.org/W3180040367","https://openalex.org/W2961534621","https://openalex.org/W1965046275","https://openalex.org/W585380317"],"abstract_inverted_index":{"By":[0],"combining":[1],"user":[2],"interactivity":[3],"and":[4,37,49,96,179,211],"precise":[5,97,139,216],"control":[6,98,140,147,167],"over":[7,99,141,148],"image":[8,14,80,130,142,149,168,200,210,218],"attributes,":[9,150],"the":[10,47,63,68,72,91,100,104,108,215,224],"applications":[11],"of":[12,42,51,67,87,93,113,199,226,232],"text-based":[13],"synthesis":[15],"can":[16,30],"be":[17],"vastly":[18],"expanded.":[19],"Thanks":[20],"to":[21,61,126,166,195,203],"recent":[22,160],"advancements":[23,78],"in":[24,79,103,146,158],"developing":[25],"state-of-the-art":[26],"text-to-image":[27],"models,":[28],"we":[29],"now":[31],"generate":[32],"images":[33,52,110],"with":[34,188,229],"high":[35],"fidelity":[36,48],"diversity.":[38],"While":[39],"a":[40,123,154,159,189,197,230],"plethora":[41],"research":[43],"focuses":[44],"on":[45],"improving":[46],"diversity":[50],"generated":[53,69],"using":[54],"text":[55,83,165],"prompts,":[56,84],"less":[57],"focus":[58],"is":[59,185],"given":[60,207],"controlling":[62],"attributes":[64,169],"or":[65],"characteristics":[66],"images.":[70],"On":[71],"other":[73],"hand,":[74],"there":[75],"have":[76],"been":[77],"editing":[81,131,201],"through":[82],"but":[85],"most":[86],"these":[88,118],"models":[89],"lack":[90],"understanding":[92],"spatial":[94],"knowledge":[95],"objects":[101],"present":[102],"image,":[105],"which":[106,137,162],"makes":[107],"edited":[109,217],"lose":[111],"much":[112],"their":[114],"characteristics.":[115],"To":[116,144],"address":[117],"challenges,":[119],"this":[120,151,183,227],"study":[121,152,222],"proposes":[122],"three-step":[124],"methodology":[125,156],"introduce":[127],"our":[128],"text-driven":[129],"conditional":[132],"diffusion":[133],"model":[134,184],"called":[135],"\u201cOctopus,\u201d":[136],"has":[138],"attributes.":[143],"bring":[145],"uses":[153,163],"similar":[155],"proposed":[157],"study,":[161],"rich":[164],"while":[170],"exploring":[171],"different":[172],"formats,":[173],"including":[174],"bold,":[175],"footnotes,":[176],"font":[177,180],"size,":[178],"color.":[181],"Then":[182],"used":[186],"along":[187],"fine-tuned":[190],"Large":[191],"Language":[192],"Model":[193],"(Gemma),":[194],"create":[196],"dataset":[198],"examples":[202],"train":[204],"Octopus,":[205],"which,":[206],"an":[208],"input":[209],"edit":[212],"instructions,":[213],"generates":[214],"during":[219],"inference.":[220],"This":[221],"demonstrates":[223],"potential":[225],"approach":[228],"variety":[231],"compelling":[233],"results.":[234]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
