{"id":"https://openalex.org/W3048835936","doi":"https://doi.org/10.1145/3474085.3475343","title":"Text as Neural Operator:Image Manipulation by Text Instruction","display_name":"Text as Neural Operator:Image Manipulation by Text Instruction","publication_year":2021,"publication_date":"2021-10-17","ids":{"openalex":"https://openalex.org/W3048835936","doi":"https://doi.org/10.1145/3474085.3475343","mag":"3048835936"},"language":"en","primary_location":{"id":"doi:10.1145/3474085.3475343","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3474085.3475343","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3474085.3475343","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3474085.3475343","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100669565","display_name":"Tianhao Zhang","orcid":"https://orcid.org/0000-0002-5939-3932"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Tianhao Zhang","raw_affiliation_strings":["Google Research, Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Google Research, Mountain View, CA, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090586839","display_name":"Hung-Yu Tseng","orcid":null},"institutions":[{"id":"https://openalex.org/I156087764","display_name":"University of California, Merced","ror":"https://ror.org/00d9ah105","country_code":"US","type":"education","lineage":["https://openalex.org/I156087764"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hung-Yu Tseng","raw_affiliation_strings":["University of California, Merced, Merced, CA, USA"],"affiliations":[{"raw_affiliation_string":"University of California, Merced, Merced, CA, USA","institution_ids":["https://openalex.org/I156087764"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090730336","display_name":"Lu Jiang","orcid":"https://orcid.org/0000-0003-0286-8439"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lu Jiang","raw_affiliation_strings":["Google Research, Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Google Research, Mountain View, CA, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101699552","display_name":"Weilong Yang","orcid":"https://orcid.org/0000-0001-7888-2304"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Weilong Yang","raw_affiliation_strings":["Waymo, Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Waymo, Mountain View, CA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108652283","display_name":"Honglak Lee","orcid":"https://orcid.org/0000-0002-1279-0068"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan\u2013Ann Arbor","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Honglak Lee","raw_affiliation_strings":["University of Michigan, Ann Arbor, MI, USA"],"affiliations":[{"raw_affiliation_string":"University of Michigan, Ann Arbor, MI, USA","institution_ids":["https://openalex.org/I27837315"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5070348998","display_name":"Irfan Essa","orcid":"https://orcid.org/0000-0002-6236-2969"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Irfan Essa","raw_affiliation_strings":["Google Research, Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Google Research, Mountain View, CA, USA","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100669565"],"corresponding_institution_ids":["https://openalex.org/I1291425158"],"apc_list":null,"apc_paid":null,"fwci":2.519,"has_fulltext":true,"cited_by_count":28,"citation_normalized_percentile":{"value":0.90939151,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1893","last_page":"1902"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9936000108718872,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9926999807357788,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8708306550979614},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.6293509006500244},{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.6262951493263245},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.6221426129341125},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5703549385070801},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5659147500991821},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.565261721611023},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5417720675468445},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.527203381061554},{"id":"https://openalex.org/keywords/image-editing","display_name":"Image editing","score":0.4966626763343811},{"id":"https://openalex.org/keywords/automatic-image-annotation","display_name":"Automatic image annotation","score":0.461056113243103},{"id":"https://openalex.org/keywords/relevance-feedback","display_name":"Relevance feedback","score":0.4466691315174103},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4255399703979492},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.41394972801208496},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.35749173164367676}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8708306550979614},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.6293509006500244},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.6262951493263245},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.6221426129341125},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5703549385070801},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5659147500991821},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.565261721611023},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5417720675468445},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.527203381061554},{"id":"https://openalex.org/C2776674983","wikidata":"https://www.wikidata.org/wiki/Q545981","display_name":"Image editing","level":3,"score":0.4966626763343811},{"id":"https://openalex.org/C199579030","wikidata":"https://www.wikidata.org/wiki/Q2851778","display_name":"Automatic image annotation","level":4,"score":0.461056113243103},{"id":"https://openalex.org/C2779532271","wikidata":"https://www.wikidata.org/wiki/Q445558","display_name":"Relevance feedback","level":4,"score":0.4466691315174103},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4255399703979492},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.41394972801208496},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.35749173164367676},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3474085.3475343","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3474085.3475343","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3474085.3475343","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Multimedia","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2008.04556","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2008.04556","pdf_url":"https://arxiv.org/pdf/2008.04556","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"doi:10.1145/3474085.3475343","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3474085.3475343","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3474085.3475343","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Multimedia","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.699999988079071,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3048835936.pdf","grobid_xml":"https://content.openalex.org/works/W3048835936.grobid-xml"},"referenced_works_count":138,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1996418862","https://openalex.org/W2010223195","https://openalex.org/W2033365921","https://openalex.org/W2099471712","https://openalex.org/W2141311545","https://openalex.org/W2175714310","https://openalex.org/W2194775991","https://openalex.org/W2326925005","https://openalex.org/W2340897893","https://openalex.org/W2412393473","https://openalex.org/W2525579820","https://openalex.org/W2547875792","https://openalex.org/W2552465644","https://openalex.org/W2561715562","https://openalex.org/W2572730214","https://openalex.org/W2593414223","https://openalex.org/W2603777577","https://openalex.org/W2604130399","https://openalex.org/W2605287558","https://openalex.org/W2611932403","https://openalex.org/W2612063021","https://openalex.org/W2624614404","https://openalex.org/W2626778328","https://openalex.org/W2735001949","https://openalex.org/W2760103357","https://openalex.org/W2767175863","https://openalex.org/W2768959015","https://openalex.org/W2774625825","https://openalex.org/W2788095258","https://openalex.org/W2796341166","https://openalex.org/W2797650215","https://openalex.org/W2798386241","https://openalex.org/W2798503981","https://openalex.org/W2798600195","https://openalex.org/W2803170315","https://openalex.org/W2807033398","https://openalex.org/W2888039002","https://openalex.org/W2891417743","https://openalex.org/W2893749619","https://openalex.org/W2894754680","https://openalex.org/W2896457183","https://openalex.org/W2899771611","https://openalex.org/W2902372913","https://openalex.org/W2905544595","https://openalex.org/W2914738578","https://openalex.org/W2916671372","https://openalex.org/W2920879895","https://openalex.org/W2931831966","https://openalex.org/W2943259397","https://openalex.org/W2949650786","https://openalex.org/W2949808870","https://openalex.org/W2949981374","https://openalex.org/W2950404765","https://openalex.org/W2951697117","https://openalex.org/W2952056941","https://openalex.org/W2952716587","https://openalex.org/W2953016680","https://openalex.org/W2953064936","https://openalex.org/W2953139137","https://openalex.org/W2960202457","https://openalex.org/W2962716332","https://openalex.org/W2962793481","https://openalex.org/W2962819541","https://openalex.org/W2962927829","https://openalex.org/W2962974533","https://openalex.org/W2963047368","https://openalex.org/W2963073614","https://openalex.org/W2963131783","https://openalex.org/W2963163163","https://openalex.org/W2963184176","https://openalex.org/W2963341956","https://openalex.org/W2963393838","https://openalex.org/W2963402808","https://openalex.org/W2963683323","https://openalex.org/W2963800363","https://openalex.org/W2963890275","https://openalex.org/W2963907629","https://openalex.org/W2963917086","https://openalex.org/W2963966654","https://openalex.org/W2963981733","https://openalex.org/W2964082983","https://openalex.org/W2964121744","https://openalex.org/W2964138017","https://openalex.org/W2964338366","https://openalex.org/W2965289598","https://openalex.org/W2966792645","https://openalex.org/W2967078376","https://openalex.org/W2970127127","https://openalex.org/W2970246438","https://openalex.org/W2971191214","https://openalex.org/W2971356303","https://openalex.org/W2972328244","https://openalex.org/W2975181451","https://openalex.org/W2978052557","https://openalex.org/W2981464166","https://openalex.org/W2981824749","https://openalex.org/W2982014123","https://openalex.org/W2982260486","https://openalex.org/W2984854289","https://openalex.org/W2987870723","https://openalex.org/W2988086053","https://openalex.org/W2990428574","https://openalex.org/W2993158499","https://openalex.org/W2997355574","https://openalex.org/W3008653537","https://openalex.org/W3026458074","https://openalex.org/W3029165865","https://openalex.org/W3035316078","https://openalex.org/W3037567775","https://openalex.org/W3043343545","https://openalex.org/W3043801304","https://openalex.org/W3048077125","https://openalex.org/W3048372515","https://openalex.org/W3082767334","https://openalex.org/W3092775865","https://openalex.org/W3092906216","https://openalex.org/W3093122931","https://openalex.org/W3094715874","https://openalex.org/W3095707326","https://openalex.org/W3096034082","https://openalex.org/W3108984808","https://openalex.org/W3115686843","https://openalex.org/W3118682159","https://openalex.org/W3119479239","https://openalex.org/W3143197440","https://openalex.org/W3171615848","https://openalex.org/W3172514680","https://openalex.org/W4238155858","https://openalex.org/W4247924304","https://openalex.org/W4249142012","https://openalex.org/W4288079578","https://openalex.org/W4297606427","https://openalex.org/W4297730607","https://openalex.org/W4301206121","https://openalex.org/W4320013936","https://openalex.org/W4385245566","https://openalex.org/W6601052344"],"related_works":["https://openalex.org/W3177930984","https://openalex.org/W2052697133","https://openalex.org/W2076896210","https://openalex.org/W1606474234","https://openalex.org/W2034539438","https://openalex.org/W2384288472","https://openalex.org/W1539573266","https://openalex.org/W79946180","https://openalex.org/W2028063938","https://openalex.org/W2385736320"],"abstract_inverted_index":{"n":[0],"recent":[1,117],"years,":[2],"text-guided":[3],"image":[4,21,42,68,106,139],"manipulation":[5],"has":[6,23],"gained":[7],"increasing":[8],"attention":[9],"in":[10,73],"the":[11,55,60,81,105,111],"multimedia":[12],"and":[13,69,131,134],"computer":[14],"vision":[15],"community.":[16],"The":[17,57,92],"input":[18],"to":[19,27,39,50,80,88,96,102,142],"conditional":[20],"generation":[22],"evolved":[24],"from":[25],"image-only":[26],"multimodality.":[28],"In":[29],"this":[30,90],"paper,":[31],"we":[32],"study":[33],"a":[34,66,85,138],"setting":[35],"that":[36,76,110],"allows":[37],"users":[38],"edit":[40],"an":[41,71],"with":[43],"multiple":[44],"objects":[45],"using":[46],"complex":[47],"text":[48,98],"instructions":[49],"add,":[51],"remove,":[52],"or":[53],"change":[54],"objects.":[56],"inputs":[58],"of":[59,128],"task":[61],"are":[62],"multimodal":[63],"including":[64],"(1)":[65],"reference":[67],"(2)":[70],"instruction":[72],"natural":[74],"language":[75],"describes":[77],"desired":[78],"modifications":[79],"image.":[82],"We":[83,108],"propose":[84],"GAN-based":[86],"method":[87],"tackle":[89],"problem.":[91],"key":[93],"idea":[94],"is":[95],"treat":[97],"as":[99,137],"neural":[100],"operators":[101],"locally":[103],"modify":[104],"feature.":[107],"show":[109],"proposed":[112],"model":[113],"performs":[114],"favorably":[115],"against":[116],"strong":[118],"baselines":[119],"on":[120],"three":[121],"public":[122],"datasets.":[123],"Specifically,":[124],"it":[125],"generates":[126],"images":[127],"greater":[129],"fidelity":[130],"semantic":[132],"relevance,":[133],"when":[135],"used":[136],"query,":[140],"leads":[141],"better":[143],"retrieval":[144],"performance.":[145]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":9},{"year":2022,"cited_by_count":9},{"year":2021,"cited_by_count":4}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
