{"id":"https://openalex.org/W3035463379","doi":"https://doi.org/10.24963/ijcai.2020/107","title":"Non-Autoregressive Image Captioning with Counterfactuals-Critical Multi-Agent Learning","display_name":"Non-Autoregressive Image Captioning with Counterfactuals-Critical Multi-Agent Learning","publication_year":2020,"publication_date":"2020-07-01","ids":{"openalex":"https://openalex.org/W3035463379","doi":"https://doi.org/10.24963/ijcai.2020/107","mag":"3035463379"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2020/107","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2020/107","pdf_url":"https://www.ijcai.org/proceedings/2020/0107.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Ninth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.ijcai.org/proceedings/2020/0107.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5040424302","display_name":"Longteng Guo","orcid":"https://orcid.org/0000-0002-4340-4000"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Longteng Guo","raw_affiliation_strings":["National Laboratory of Pattern Recognition, Institute of Automation, Chinese Academy of Sciences","School of Artificial Intelligence, University of Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"National Laboratory of Pattern Recognition, Institute of Automation, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108392430","display_name":"Jing Liu","orcid":"https://orcid.org/0000-0003-0903-9131"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jing Liu","raw_affiliation_strings":["National Laboratory of Pattern Recognition, Institute of Automation, Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"National Laboratory of Pattern Recognition, Institute of Automation, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103047969","display_name":"Xinxin Zhu","orcid":"https://orcid.org/0000-0002-2142-5580"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinxin Zhu","raw_affiliation_strings":["National Laboratory of Pattern Recognition, Institute of Automation, Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"National Laboratory of Pattern Recognition, Institute of Automation, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101777772","display_name":"Xingjian He","orcid":"https://orcid.org/0000-0001-5396-6253"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xingjian He","raw_affiliation_strings":["National Laboratory of Pattern Recognition, Institute of Automation, Chinese Academy of Sciences","School of Artificial Intelligence, University of Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"National Laboratory of Pattern Recognition, Institute of Automation, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079667048","display_name":"Jie Jiang","orcid":"https://orcid.org/0000-0001-9666-815X"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jie Jiang","raw_affiliation_strings":["National Laboratory of Pattern Recognition, Institute of Automation, Chinese Academy of Sciences","School of Artificial Intelligence, University of Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"National Laboratory of Pattern Recognition, Institute of Automation, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100511737","display_name":"Hanqing Lu","orcid":"https://orcid.org/0000-0001-9506-3407"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hanqing Lu","raw_affiliation_strings":["National Laboratory of Pattern Recognition, Institute of Automation, Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"National Laboratory of Pattern Recognition, Institute of Automation, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5040424302"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":2.9441,"has_fulltext":false,"cited_by_count":48,"citation_normalized_percentile":{"value":0.92664976,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"767","last_page":"773"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9934999942779541,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.982699990272522,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9132977724075317},{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.8286860585212708},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.815392255783081},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6044634580612183},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.511725902557373},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.46352750062942505},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.457914263010025},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.4548620879650116},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.43379899859428406},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3945396840572357},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.35076814889907837},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.25339940190315247},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.08391749858856201},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08276337385177612}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9132977724075317},{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.8286860585212708},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.815392255783081},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6044634580612183},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.511725902557373},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.46352750062942505},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.457914263010025},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.4548620879650116},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.43379899859428406},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3945396840572357},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.35076814889907837},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.25339940190315247},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.08391749858856201},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08276337385177612},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.24963/ijcai.2020/107","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2020/107","pdf_url":"https://www.ijcai.org/proceedings/2020/0107.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Ninth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.24963/ijcai.2020/107","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2020/107","pdf_url":"https://www.ijcai.org/proceedings/2020/0107.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Ninth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.4099999964237213,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G1117215680","display_name":null,"funder_award_id":"61872366","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6058138561","display_name":null,"funder_award_id":", No.","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7767465607","display_name":null,"funder_award_id":"61872364","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8706534421","display_name":null,"funder_award_id":"61922086","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8863666567","display_name":null,"funder_award_id":"and No.","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3035463379.pdf","grobid_xml":"https://content.openalex.org/works/W3035463379.grobid-xml"},"referenced_works_count":23,"referenced_works":["https://openalex.org/W206679605","https://openalex.org/W1522301498","https://openalex.org/W1889081078","https://openalex.org/W2122763142","https://openalex.org/W2156718681","https://openalex.org/W2463955103","https://openalex.org/W2617547828","https://openalex.org/W2737766105","https://openalex.org/W2767206889","https://openalex.org/W2947912915","https://openalex.org/W2955956881","https://openalex.org/W2962969034","https://openalex.org/W2963434219","https://openalex.org/W2963536265","https://openalex.org/W2963736842","https://openalex.org/W2987123286","https://openalex.org/W2990818246","https://openalex.org/W2996677910","https://openalex.org/W3103651098","https://openalex.org/W4214717370","https://openalex.org/W4230563027","https://openalex.org/W4288329833","https://openalex.org/W4385245566"],"related_works":["https://openalex.org/W4210416330","https://openalex.org/W2775506363","https://openalex.org/W3088136942","https://openalex.org/W4290852288","https://openalex.org/W2949362007","https://openalex.org/W4283207562","https://openalex.org/W2963177403","https://openalex.org/W2330246314","https://openalex.org/W2949522393","https://openalex.org/W3217195652"],"abstract_inverted_index":{"Most":[0],"image":[1,142],"captioning":[2,136,143],"models":[3,47],"are":[4,114],"autoregressive,":[5],"i.e.":[6],"they":[7],"generate":[8],"each":[9,55],"word":[10,56],"by":[11,39],"conditioning":[12],"on":[13,140],"previously":[14],"generated":[15],"words,":[16],"which":[17],"leads":[18],"to":[19,33,53,64,120,129,134,154],"heavy":[20],"latency":[21],"during":[22],"inference.":[23],"Recently,":[24],"non-autoregressive":[25,77],"decoding":[26,161],"has":[27],"been":[28],"proposed":[29],"in":[30,43,71,110],"machine":[31],"translation":[32],"speed":[34],"up":[35],"the":[36,49,66,111],"inference":[37],"time":[38],"generating":[40],"all":[41],"words":[42],"parallel.":[44],"Typically,":[45],"these":[46,76],"use":[48],"word-level":[50],"cross-entropy":[51],"loss":[52],"optimize":[54],"independently.":[57],"However,":[58],"such":[59],"a":[60,84,91,103,123,151],"learning":[61,106],"process":[62],"fails":[63],"consider":[65],"sentence-level":[67,124],"consistency,":[68],"thus":[69],"resulting":[70],"inferior":[72],"generation":[73],"quality":[74],"of":[75],"models.":[78],"In":[79],"this":[80],"paper,":[81],"we":[82,127],"propose":[83,128],"Non-Autoregressive":[85],"Image":[86],"Captioning":[87],"(NAIC)":[88],"model":[89,149],"with":[90],"novel":[92],"training":[93],"paradigm:":[94],"Counterfactuals-critical":[95],"Multi-Agent":[96],"Learning":[97],"(CMAL).":[98],"CMAL":[99],"formulates":[100],"NAIC":[101,148],"as":[102,116],"multi-agent":[104],"reinforcement":[105],"system":[107],"where":[108],"positions":[109],"target":[112],"sequence":[113],"viewed":[115],"agents":[117],"that":[118,146],"learn":[119],"cooperatively":[121],"maximize":[122],"reward.":[125],"Besides,":[126],"utilize":[130],"massive":[131],"unlabeled":[132],"images":[133],"boost":[135],"performance.":[137],"Extensive":[138],"experiments":[139],"MSCOCO":[141],"benchmark":[144],"show":[145],"our":[147],"achieves":[150],"performance":[152],"comparable":[153],"state-of-the-art":[155],"autoregressive":[156],"models,":[157],"while":[158],"brings":[159],"13.9x":[160],"speedup.":[162]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":16},{"year":2022,"cited_by_count":7},{"year":2021,"cited_by_count":7}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
