{"id":"https://openalex.org/W4213455776","doi":"https://doi.org/10.1109/tmm.2022.3154154","title":"Learning by Imagination: A Joint Framework for Text-Based Image Manipulation and Change Captioning","display_name":"Learning by Imagination: A Joint Framework for Text-Based Image Manipulation and Change Captioning","publication_year":2022,"publication_date":"2022-02-24","ids":{"openalex":"https://openalex.org/W4213455776","doi":"https://doi.org/10.1109/tmm.2022.3154154"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2022.3154154","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2022.3154154","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5073743493","display_name":"Kenan E. Ak","orcid":"https://orcid.org/0000-0001-5863-3685"},"institutions":[{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Kenan E. Ak","raw_affiliation_strings":["Visual Intelligence, Institute for Infocomm Research, Singapore"],"affiliations":[{"raw_affiliation_string":"Visual Intelligence, Institute for Infocomm Research, Singapore","institution_ids":["https://openalex.org/I3005327000"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100649794","display_name":"Ying Sun","orcid":"https://orcid.org/0000-0002-7224-6726"},"institutions":[{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Ying Sun","raw_affiliation_strings":["Visual Intelligence, Institute for Infocomm Research, Singapore"],"affiliations":[{"raw_affiliation_string":"Visual Intelligence, Institute for Infocomm Research, Singapore","institution_ids":["https://openalex.org/I3005327000"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077258552","display_name":"Joo\u2010Hwee Lim","orcid":"https://orcid.org/0000-0002-4103-3824"},"institutions":[{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Joo Hwee Lim","raw_affiliation_strings":["Visual Intelligence, Institute for Infocomm Research, Singapore"],"affiliations":[{"raw_affiliation_string":"Visual Intelligence, Institute for Infocomm Research, Singapore","institution_ids":["https://openalex.org/I3005327000"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5073743493"],"corresponding_institution_ids":["https://openalex.org/I3005327000"],"apc_list":null,"apc_paid":null,"fwci":1.7129,"has_fulltext":false,"cited_by_count":17,"citation_normalized_percentile":{"value":0.85267807,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"25","issue":null,"first_page":"3006","last_page":"3016"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9921000003814697,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9853000044822693,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.948228120803833},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8737105131149292},{"id":"https://openalex.org/keywords/discriminator","display_name":"Discriminator","score":0.6342807412147522},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5549554824829102},{"id":"https://openalex.org/keywords/joint","display_name":"Joint (building)","score":0.5136613249778748},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.4714157283306122},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4648747742176056},{"id":"https://openalex.org/keywords/ground-truth","display_name":"Ground truth","score":0.4585530459880829},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.43622562289237976},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3951405882835388}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.948228120803833},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8737105131149292},{"id":"https://openalex.org/C2779803651","wikidata":"https://www.wikidata.org/wiki/Q5282088","display_name":"Discriminator","level":3,"score":0.6342807412147522},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5549554824829102},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.5136613249778748},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.4714157283306122},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4648747742176056},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.4585530459880829},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.43622562289237976},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3951405882835388},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C170154142","wikidata":"https://www.wikidata.org/wiki/Q150737","display_name":"Architectural engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2022.3154154","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2022.3154154","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.7099999785423279,"id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320320696","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":94,"referenced_works":["https://openalex.org/W648786980","https://openalex.org/W1514535095","https://openalex.org/W1522301498","https://openalex.org/W1686810756","https://openalex.org/W1797268635","https://openalex.org/W1811254738","https://openalex.org/W1895577753","https://openalex.org/W1905882502","https://openalex.org/W1956340063","https://openalex.org/W2064675550","https://openalex.org/W2101105183","https://openalex.org/W2108598243","https://openalex.org/W2123229215","https://openalex.org/W2123301721","https://openalex.org/W2150856297","https://openalex.org/W2154652894","https://openalex.org/W2155027007","https://openalex.org/W2160544350","https://openalex.org/W2170881581","https://openalex.org/W2194775991","https://openalex.org/W2203575840","https://openalex.org/W2405756170","https://openalex.org/W2506483933","https://openalex.org/W2561715562","https://openalex.org/W2592101326","https://openalex.org/W2618530766","https://openalex.org/W2745461083","https://openalex.org/W2766736793","https://openalex.org/W2785678896","https://openalex.org/W2798951647","https://openalex.org/W2802650881","https://openalex.org/W2803525166","https://openalex.org/W2883861033","https://openalex.org/W2884870985","https://openalex.org/W2885013662","https://openalex.org/W2885537606","https://openalex.org/W2905145027","https://openalex.org/W2905544595","https://openalex.org/W2948358897","https://openalex.org/W2950404765","https://openalex.org/W2951183276","https://openalex.org/W2962793481","https://openalex.org/W2962845008","https://openalex.org/W2963084599","https://openalex.org/W2963163163","https://openalex.org/W2963177403","https://openalex.org/W2963224792","https://openalex.org/W2963351113","https://openalex.org/W2963767194","https://openalex.org/W2963966654","https://openalex.org/W2964024144","https://openalex.org/W2964050021","https://openalex.org/W2964196083","https://openalex.org/W2964313012","https://openalex.org/W2964318046","https://openalex.org/W2970067499","https://openalex.org/W2970562079","https://openalex.org/W2979382951","https://openalex.org/W2984809863","https://openalex.org/W2988981892","https://openalex.org/W2993158499","https://openalex.org/W3010257550","https://openalex.org/W3012404734","https://openalex.org/W3016151052","https://openalex.org/W3019301826","https://openalex.org/W3035316078","https://openalex.org/W3048835936","https://openalex.org/W3089767655","https://openalex.org/W3101313921","https://openalex.org/W3108170342","https://openalex.org/W3132016729","https://openalex.org/W3175933895","https://openalex.org/W3176470992","https://openalex.org/W3196229461","https://openalex.org/W4300838842","https://openalex.org/W4320013936","https://openalex.org/W6621543089","https://openalex.org/W6630875275","https://openalex.org/W6631190155","https://openalex.org/W6637373629","https://openalex.org/W6638319203","https://openalex.org/W6638742206","https://openalex.org/W6678262379","https://openalex.org/W6682631176","https://openalex.org/W6683204974","https://openalex.org/W6713645886","https://openalex.org/W6725318829","https://openalex.org/W6728889164","https://openalex.org/W6734373134","https://openalex.org/W6748582592","https://openalex.org/W6755102824","https://openalex.org/W6767137312","https://openalex.org/W6779841522","https://openalex.org/W6782521812"],"related_works":["https://openalex.org/W4210416330","https://openalex.org/W2775506363","https://openalex.org/W3088136942","https://openalex.org/W4290852288","https://openalex.org/W2949362007","https://openalex.org/W4283207562","https://openalex.org/W2963177403","https://openalex.org/W2330246314","https://openalex.org/W2949522393","https://openalex.org/W4289422896"],"abstract_inverted_index":{"Image":[0],"and":[1,20,43,59,88,122,134,221,246],"text":[2,14,174],"are":[3],"dual":[4],"modalities":[5],"of":[6,47,67,85,189,244],"our":[7,211,231],"semantic":[8],"interpretation.":[9],"Changing":[10],"images":[11,110],"based":[12],"on":[13,73,217],"descriptions":[15],"allows":[16],"us":[17],"to":[18,51,137,240],"imagine":[19],"visualize":[21],"the":[22,45,64,103,113,150,153,158,165,171,187,190,197,204,227,236,241],"world":[23],"(a.k.a.":[24],"text-based":[25],"image":[26,155],"manipulation":[27],"(TIM)).":[28],"In":[29,149],"this":[30,180,234],"paper,":[31],"we":[32,92,126,185,194,222],"introduce":[33],"a":[34,57,128,146],"framework":[35,212],"that":[36,94,210],"combines":[37],"TIM":[38,68,133,159,191,198,215,245],"with":[39,164],"change":[40],"captioning":[41],"(CC)":[42],"utilizes":[44],"benefits":[46],"co-training.":[48],"CC":[49,104,114,135,166,205,228,247],"aims":[50],"describe":[52],"what":[53],"has":[54],"changed":[55],"in":[56,132,226],"scene":[58],"can":[60,79,99,116,161,176],"be":[61,80,117,162,177],"regarded":[62,81],"as":[63,82,111,119,142,144,200],"inverse":[65],"version":[66],"where":[69],"both":[70],"tasks":[71],"rely":[72],"generative":[74,77],"networks.":[75],"These":[76],"networks":[78],"data":[83],"producers":[84],"each":[86],"other":[87],"unlike":[89],"previous":[90],"methods,":[91],"discover":[93],"integrating":[95],"their":[96],"learning":[97],"procedures":[98],"benefit":[100],"both.":[101],"Since":[102],"module":[105,115,160,199],"describes":[106],"differences":[107],"between":[108],"two":[109],"text,":[112],"used":[118],"evaluation":[120],"criteria":[121],"provide":[123],"feedback.":[124],"Furthermore,":[125],"utilize":[127,196],"shared":[129],"attention":[130],"mechanism":[131],"modules":[136],"localize":[138],"towards":[139],"prominent":[140],"regions":[141],"well":[143],"enabling":[145],"change-aware":[147],"discriminator.":[148],"opposite":[151],"direction,":[152],"output":[154],"synthesized":[156],"by":[157,168],"assessed":[163],"module,":[167,192],"checking":[169],"whether":[170],"ground":[172],"truth":[173],"description":[175],"redescribed.":[178],"Following":[179],"insight,":[181],"not":[182],"only":[183],"do":[184],"boost":[186],"training":[188,243],"but":[193],"also":[195],"additional":[201],"supervision":[202],"for":[203],"training.":[206],"Experimental":[207],"results":[208],"show":[209],"outperforms":[213],"existing":[214],"methods":[216],"several":[218],"datasets":[219],"substantially":[220],"achieve":[223],"marginal":[224],"improvements":[225],"module.":[229],"To":[230],"best":[232],"knowledge,":[233],"is":[235],"first":[237],"study":[238],"dedicated":[239],"joint":[242],"tasks.":[248]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
