{"id":"https://openalex.org/W2966162142","doi":"https://doi.org/10.1109/tcsvt.2020.2965966","title":"Cascaded Revision Network for Novel Object Captioning","display_name":"Cascaded Revision Network for Novel Object Captioning","publication_year":2020,"publication_date":"2020-01-14","ids":{"openalex":"https://openalex.org/W2966162142","doi":"https://doi.org/10.1109/tcsvt.2020.2965966","mag":"2966162142"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2020.2965966","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2020.2965966","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1908.02726","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5007537121","display_name":"Qianyu Feng","orcid":null},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Qianyu Feng","raw_affiliation_strings":["Centre for Artificial Intelligence, University of Technology Sydney, Sydney, NSW, Australia"],"affiliations":[{"raw_affiliation_string":"Centre for Artificial Intelligence, University of Technology Sydney, Sydney, NSW, Australia","institution_ids":["https://openalex.org/I114017466"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100324098","display_name":"Yu Wu","orcid":"https://orcid.org/0000-0002-1680-8253"},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Yu Wu","raw_affiliation_strings":["Centre for Artificial Intelligence, University of Technology Sydney, Sydney, NSW, Australia"],"affiliations":[{"raw_affiliation_string":"Centre for Artificial Intelligence, University of Technology Sydney, Sydney, NSW, Australia","institution_ids":["https://openalex.org/I114017466"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002207978","display_name":"Hehe Fan","orcid":"https://orcid.org/0000-0001-9572-2345"},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Hehe Fan","raw_affiliation_strings":["Centre for Artificial Intelligence, University of Technology Sydney, Sydney, NSW, Australia"],"affiliations":[{"raw_affiliation_string":"Centre for Artificial Intelligence, University of Technology Sydney, Sydney, NSW, Australia","institution_ids":["https://openalex.org/I114017466"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054311881","display_name":"Chenggang Yan","orcid":"https://orcid.org/0000-0003-1204-0512"},"institutions":[{"id":"https://openalex.org/I50760025","display_name":"Hangzhou Dianzi University","ror":"https://ror.org/0576gt767","country_code":"CN","type":"education","lineage":["https://openalex.org/I50760025"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chenggang Yan","raw_affiliation_strings":["Hangzhou Dianzi University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Hangzhou Dianzi University, Hangzhou, China","institution_ids":["https://openalex.org/I50760025"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081346568","display_name":"Mingliang Xu","orcid":"https://orcid.org/0000-0002-6885-3451"},"institutions":[{"id":"https://openalex.org/I38877650","display_name":"Zhengzhou University","ror":"https://ror.org/04ypx8c21","country_code":"CN","type":"education","lineage":["https://openalex.org/I38877650"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingliang Xu","raw_affiliation_strings":["School of Information Engineering, Zhengzhou University, Zhengzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Information Engineering, Zhengzhou University, Zhengzhou, China","institution_ids":["https://openalex.org/I38877650"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5005421447","display_name":"Yi Yang","orcid":"https://orcid.org/0000-0002-0512-880X"},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Yi Yang","raw_affiliation_strings":["Centre for Artificial Intelligence, University of Technology Sydney, Sydney, NSW, Australia"],"affiliations":[{"raw_affiliation_string":"Centre for Artificial Intelligence, University of Technology Sydney, Sydney, NSW, Australia","institution_ids":["https://openalex.org/I114017466"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5007537121"],"corresponding_institution_ids":["https://openalex.org/I114017466"],"apc_list":null,"apc_paid":null,"fwci":3.4339,"has_fulltext":false,"cited_by_count":42,"citation_normalized_percentile":{"value":0.9383238,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"30","issue":"10","first_page":"3413","last_page":"3421"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9926999807357788,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9862526655197144},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8504506349563599},{"id":"https://openalex.org/keywords/perplexity","display_name":"Perplexity","score":0.7987973690032959},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.6591943502426147},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6567881107330322},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.6199243664741516},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.545535147190094},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5422216057777405},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.521284818649292},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5067477822303772},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4855491816997528},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.48488861322402954},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.47224682569503784},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.46236613392829895},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.45882344245910645},{"id":"https://openalex.org/keywords/domain-knowledge","display_name":"Domain knowledge","score":0.43416982889175415},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3719854950904846},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.3470829129219055},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.2652609348297119}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9862526655197144},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8504506349563599},{"id":"https://openalex.org/C100279451","wikidata":"https://www.wikidata.org/wiki/Q372193","display_name":"Perplexity","level":3,"score":0.7987973690032959},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.6591943502426147},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6567881107330322},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.6199243664741516},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.545535147190094},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5422216057777405},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.521284818649292},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5067477822303772},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4855491816997528},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.48488861322402954},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.47224682569503784},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.46236613392829895},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.45882344245910645},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.43416982889175415},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3719854950904846},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3470829129219055},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2652609348297119},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/tcsvt.2020.2965966","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2020.2965966","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:1908.02726","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1908.02726","pdf_url":"https://arxiv.org/pdf/1908.02726","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:opus.lib.uts.edu.au:10453/139095","is_oa":false,"landing_page_url":"http://hdl.handle.net/10453/139095","pdf_url":null,"source":{"id":"https://openalex.org/S4306401357","display_name":"UTS ePRESS (University of Technology Sydney)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I114017466","host_organization_name":"University of Technology Sydney","host_organization_lineage":["https://openalex.org/I114017466"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Journal Article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1908.02726","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1908.02726","pdf_url":"https://arxiv.org/pdf/1908.02726","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"score":0.5699999928474426,"display_name":"Decent work and economic growth","id":"https://metadata.un.org/sdg/8"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":62,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1514535095","https://openalex.org/W1686810756","https://openalex.org/W1811254738","https://openalex.org/W1861492603","https://openalex.org/W1895577753","https://openalex.org/W1931639407","https://openalex.org/W1947481528","https://openalex.org/W1987835821","https://openalex.org/W2064675550","https://openalex.org/W2071680411","https://openalex.org/W2108598243","https://openalex.org/W2109586012","https://openalex.org/W2123024445","https://openalex.org/W2128532956","https://openalex.org/W2133512280","https://openalex.org/W2139380585","https://openalex.org/W2157331557","https://openalex.org/W2173180041","https://openalex.org/W2194775991","https://openalex.org/W2250539671","https://openalex.org/W2277195237","https://openalex.org/W2336589871","https://openalex.org/W2464754550","https://openalex.org/W2481240925","https://openalex.org/W2508497007","https://openalex.org/W2557728737","https://openalex.org/W2560335610","https://openalex.org/W2604729005","https://openalex.org/W2735159761","https://openalex.org/W2736590139","https://openalex.org/W2737766105","https://openalex.org/W2745461083","https://openalex.org/W2766046458","https://openalex.org/W2779610669","https://openalex.org/W2795151422","https://openalex.org/W2797733588","https://openalex.org/W2798593490","https://openalex.org/W2808138519","https://openalex.org/W2808663243","https://openalex.org/W2922521335","https://openalex.org/W2950178297","https://openalex.org/W2953158660","https://openalex.org/W2962706528","https://openalex.org/W2962835968","https://openalex.org/W2962935746","https://openalex.org/W2963088515","https://openalex.org/W2963175879","https://openalex.org/W2963327853","https://openalex.org/W2963499153","https://openalex.org/W2963758027","https://openalex.org/W2963877622","https://openalex.org/W3000029975","https://openalex.org/W3100115227","https://openalex.org/W6630875275","https://openalex.org/W6637373629","https://openalex.org/W6638742206","https://openalex.org/W6639102338","https://openalex.org/W6676497082","https://openalex.org/W6678470764","https://openalex.org/W6730028961","https://openalex.org/W6736419893"],"related_works":["https://openalex.org/W2169518243","https://openalex.org/W2252095989","https://openalex.org/W4322096525","https://openalex.org/W2551914602","https://openalex.org/W4281893144","https://openalex.org/W2105076537","https://openalex.org/W2787311093","https://openalex.org/W2084531783","https://openalex.org/W2902731467","https://openalex.org/W2020757772"],"abstract_inverted_index":{"Image":[0],"captioning,":[1],"a":[2,35,61,129,136,160,184],"challenging":[3],"task":[4],"where":[5],"the":[6,23,48,53,72,76,91,99,104,114,124,151,174,179,191,197,216,223,234],"machine":[7],"automatically":[8],"describes":[9],"an":[10,88],"image":[11,89,115],"with":[12,78,116,211,219],"natural":[13],"language,":[14],"has":[15],"drawn":[16],"significant":[17],"attention":[18],"in":[19,196],"recent":[20,27],"years.":[21],"Despite":[22],"remarkable":[24],"improvements":[25],"of":[26,38,101,131,236],"approaches,":[28],"however,":[29],"these":[30],"methods":[31],"are":[32,144],"built":[33],"upon":[34],"large":[36],"set":[37],"training":[39],"image-sentence":[40],"pairs.":[41],"The":[42],"expensive":[43],"labor":[44],"efforts":[45],"hence":[46],"limit":[47],"captioning":[49,126],"model":[50,77,164],"to":[51,86,98,122,139,147,188,231],"describe":[52,87,209],"wider":[54],"world.":[55],"In":[56,178],"this":[57,201],"paper,":[58],"we":[59,155,182],"present":[60],"novel":[62,192,202],"network":[63],"structure,":[64],"Cascaded":[65],"Revision":[66],"Network,":[67],"which":[68,142],"aims":[69],"at":[70],"relieving":[71],"problem":[73],"by":[74,128,173],"equipping":[75],"out-of-domain":[79,102],"knowledge.":[80,96],"CRN":[81,206],"first":[82],"tries":[83],"its":[84],"best":[85],"using":[90],"existing":[92],"vocabulary":[93],"from":[94,159,170],"in-domain":[95],"Due":[97],"lack":[100],"knowledge,":[103],"caption":[105],"may":[106],"be":[107,148],"inaccurate":[108,149],"or":[109],"include":[110],"ambiguous":[111],"words":[112,143,169],"for":[113],"unknown":[117],"(novel)":[118],"objects.":[119,213],"We":[120,134,214],"propose":[121],"re-edit":[123],"primary":[125],"sentence":[127],"series":[130],"cascaded":[132,203],"operations.":[133],"introduce":[135],"perplexity":[137],"predictor":[138],"find":[140],"out":[141],"most":[145],"likely":[146],"given":[150],"input":[152],"image.":[153],"Thereafter,":[154],"utilize":[156],"external":[157],"knowledge":[158],"pretrained":[161],"object":[162,193],"detection":[163,171],"and":[165],"select":[166],"more":[167],"accurate":[168],"results":[172],"visual":[175],"matching":[176,186],"module.":[177],"last":[180],"step,":[181],"design":[183],"semantic":[185],"module":[187],"ensure":[189],"that":[190],"is":[194],"fit":[195],"right":[198],"position.":[199],"By":[200],"captioning-revising":[204],"mechanism,":[205],"can":[207],"accurately":[208],"images":[210],"unseen":[212],"validate":[215],"proposed":[217],"method":[218],"state-of-the-art":[220],"performance":[221],"on":[222],"held-out":[224],"MSCOCO":[225],"dataset":[226],"as":[227,229],"well":[228],"scale":[230],"ImageNet,":[232],"demonstrating":[233],"effectiveness":[235],"our":[237],"method.":[238]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":11},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":7},{"year":2020,"cited_by_count":12}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
