{"id":"https://openalex.org/W4399116027","doi":"https://doi.org/10.48550/arxiv.2405.16803","title":"TIE: Revolutionizing Text-based Image Editing for Complex-Prompt Following and High-Fidelity Editing","display_name":"TIE: Revolutionizing Text-based Image Editing for Complex-Prompt Following and High-Fidelity Editing","publication_year":2024,"publication_date":"2024-05-27","ids":{"openalex":"https://openalex.org/W4399116027","doi":"https://doi.org/10.48550/arxiv.2405.16803"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2405.16803","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2405.16803","pdf_url":"https://arxiv.org/pdf/2405.16803","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2405.16803","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100390697","display_name":"Xinyu Zhang","orcid":"https://orcid.org/0000-0002-4986-4649"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhang, Xinyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102678209","display_name":"Mengxue Kang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kang, Mengxue","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101518157","display_name":"Fei Wei","orcid":"https://orcid.org/0000-0003-1219-9590"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei, Fei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107415124","display_name":"Shuang Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Shuang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001504472","display_name":"Yuhe Liu","orcid":"https://orcid.org/0000-0002-7470-1905"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Yuhe","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5059864471","display_name":"Lin Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Lin","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100390697"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10783","display_name":"Additive Manufacturing and 3D Printing Technologies","score":0.6158999800682068,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10783","display_name":"Additive Manufacturing and 3D Printing Technologies","score":0.6158999800682068,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12784","display_name":"Modular Robots and Swarm Intelligence","score":0.5799999833106995,"subfield":{"id":"https://openalex.org/subfields/2210","display_name":"Mechanical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/image-editing","display_name":"Image editing","score":0.8359614610671997},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6544473171234131},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.6040810346603394},{"id":"https://openalex.org/keywords/high-fidelity","display_name":"High fidelity","score":0.5608808398246765},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.5533120036125183},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.4328180253505707},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.35318154096603394},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.32078492641448975},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2969082295894623},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.11247721314430237},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1031680703163147}],"concepts":[{"id":"https://openalex.org/C2776674983","wikidata":"https://www.wikidata.org/wiki/Q545981","display_name":"Image editing","level":3,"score":0.8359614610671997},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6544473171234131},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.6040810346603394},{"id":"https://openalex.org/C113364801","wikidata":"https://www.wikidata.org/wiki/Q26674","display_name":"High fidelity","level":2,"score":0.5608808398246765},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.5533120036125183},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.4328180253505707},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.35318154096603394},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.32078492641448975},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2969082295894623},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.11247721314430237},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1031680703163147},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2405.16803","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2405.16803","pdf_url":"https://arxiv.org/pdf/2405.16803","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2405.16803","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2405.16803","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2405.16803","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2405.16803","pdf_url":"https://arxiv.org/pdf/2405.16803","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4399116027.pdf","grobid_xml":"https://content.openalex.org/works/W4399116027.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4313443006","https://openalex.org/W2945374968","https://openalex.org/W4385452045","https://openalex.org/W4293777179","https://openalex.org/W2164070813","https://openalex.org/W2135608140","https://openalex.org/W4319453795","https://openalex.org/W2895525995","https://openalex.org/W4224231624","https://openalex.org/W2332512904"],"abstract_inverted_index":{"As":[0],"the":[1,47,85,93,100,103,108,114],"field":[2],"of":[3,55,96,102,113,128],"image":[4,29,42,118,140],"generation":[5],"rapidly":[6],"advances,":[7],"traditional":[8],"diffusion":[9,60,109],"models":[10,18,61,110,121],"and":[11,27,32,52,79,99,117,157,165,170],"those":[12],"integrated":[13],"with":[14,111,124],"multimodal":[15,56,90],"large":[16],"language":[17],"(LLMs)":[19],"still":[20],"encounter":[21],"limitations":[22],"in":[23,62,139,167],"interpreting":[24],"complex":[25,155],"prompts":[26,156],"preserving":[28],"consistency":[30,166],"pre":[31],"post-editing.":[33],"To":[34],"tackle":[35],"these":[36],"challenges,":[37],"we":[38,83],"present":[39],"an":[40,150],"innovative":[41],"editing":[43],"framework":[44],"that":[45],"employs":[46],"robust":[48],"Chain-of-Thought":[49],"(CoT)":[50],"reasoning":[51],"localizing":[53],"capabilities":[54],"LLMs":[57,98],"to":[58,153],"aid":[59],"generating":[63],"more":[64],"refined":[65],"images.":[66],"We":[67],"first":[68],"meticulously":[69],"design":[70],"a":[71,88,125],"CoT":[72,94],"process":[73,95],"comprising":[74],"instruction":[75],"decomposition,":[76],"region":[77],"localization,":[78],"detailed":[80],"description.":[81],"Subsequently,":[82],"fine-tune":[84],"LISA":[86],"model,":[87],"lightweight":[89],"LLM,":[91],"using":[92],"Multimodal":[97],"mask":[101],"edited":[104],"image.":[105],"By":[106],"providing":[107],"knowledge":[112],"generated":[115],"prompt":[116],"mask,":[119],"our":[120,133,147],"generate":[122,158],"images":[123,168],"superior":[126,137],"understanding":[127],"instructions.":[129],"Through":[130],"extensive":[131],"experiments,":[132],"model":[134,148],"has":[135],"demonstrated":[136],"performance":[138],"generation,":[141],"surpassing":[142],"existing":[143],"state-of-the-art":[144],"models.":[145],"Notably,":[146],"exhibits":[149],"enhanced":[151],"ability":[152],"understand":[154],"corresponding":[159],"images,":[160],"while":[161],"maintaining":[162],"high":[163],"fidelity":[164],"before":[169],"after":[171],"generation.":[172]},"counts_by_year":[],"updated_date":"2026-03-14T08:43:22.919905","created_date":"2024-05-29T00:00:00"}
