{"id":"https://openalex.org/W2808138519","doi":"https://doi.org/10.24963/ijcai.2018/84","title":"Show, Observe and Tell: Attribute-driven Attention Model for Image Captioning","display_name":"Show, Observe and Tell: Attribute-driven Attention Model for Image Captioning","publication_year":2018,"publication_date":"2018-07-01","ids":{"openalex":"https://openalex.org/W2808138519","doi":"https://doi.org/10.24963/ijcai.2018/84","mag":"2808138519"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2018/84","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2018/84","pdf_url":"https://www.ijcai.org/proceedings/2018/0084.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Seventh International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.ijcai.org/proceedings/2018/0084.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100334151","display_name":"Hui Chen","orcid":"https://orcid.org/0000-0001-9326-6649"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]},{"id":"https://openalex.org/I29955533","display_name":"Center for Information Technology","ror":"https://ror.org/03jh5a977","country_code":"US","type":"facility","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1299303238","https://openalex.org/I29955533"]}],"countries":["CN","US"],"is_corresponding":true,"raw_author_name":"Hui Chen","raw_affiliation_strings":["Beijing National Research Center for Information Science and Technology(BNRist)","School of Software, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing National Research Center for Information Science and Technology(BNRist)","institution_ids":["https://openalex.org/I29955533"]},{"raw_affiliation_string":"School of Software, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057732142","display_name":"Guiguang Ding","orcid":"https://orcid.org/0000-0003-0137-9975"},"institutions":[{"id":"https://openalex.org/I29955533","display_name":"Center for Information Technology","ror":"https://ror.org/03jh5a977","country_code":"US","type":"facility","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1299303238","https://openalex.org/I29955533"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Guiguang Ding","raw_affiliation_strings":["Beijing National Research Center for Information Science and Technology(BNRist)","School of Software, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing National Research Center for Information Science and Technology(BNRist)","institution_ids":["https://openalex.org/I29955533"]},{"raw_affiliation_string":"School of Software, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077882811","display_name":"Zijia Lin","orcid":"https://orcid.org/0000-0002-1390-7424"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zijia Lin","raw_affiliation_strings":["Microsoft Research, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051149140","display_name":"Sicheng Zhao","orcid":"https://orcid.org/0000-0001-5843-6411"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]},{"id":"https://openalex.org/I29955533","display_name":"Center for Information Technology","ror":"https://ror.org/03jh5a977","country_code":"US","type":"facility","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1299303238","https://openalex.org/I29955533"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Sicheng Zhao","raw_affiliation_strings":["Beijing National Research Center for Information Science and Technology(BNRist)","School of Software, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing National Research Center for Information Science and Technology(BNRist)","institution_ids":["https://openalex.org/I29955533"]},{"raw_affiliation_string":"School of Software, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046605531","display_name":"Jungong Han","orcid":"https://orcid.org/0000-0003-4361-956X"},"institutions":[{"id":"https://openalex.org/I67415387","display_name":"Lancaster University","ror":"https://ror.org/04f2nsd36","country_code":"GB","type":"education","lineage":["https://openalex.org/I67415387"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jungong Han","raw_affiliation_strings":["School of Computing & Communications, Lancaster University, UK"],"affiliations":[{"raw_affiliation_string":"School of Computing & Communications, Lancaster University, UK","institution_ids":["https://openalex.org/I67415387"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100334151"],"corresponding_institution_ids":["https://openalex.org/I29955533","https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":4.7813,"has_fulltext":false,"cited_by_count":63,"citation_normalized_percentile":{"value":0.96417095,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"606","last_page":"612"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9926999807357788,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9894999861717224,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.995259165763855},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8031722903251648},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.7121473550796509},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6748225092887878},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6047388315200806},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5808819532394409},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5688221454620361},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5432958602905273},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.45747020840644836},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.4533299207687378},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3924373388290405},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3887181878089905},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.37634822726249695}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.995259165763855},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8031722903251648},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.7121473550796509},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6748225092887878},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6047388315200806},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5808819532394409},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5688221454620361},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5432958602905273},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.45747020840644836},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.4533299207687378},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3924373388290405},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3887181878089905},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.37634822726249695},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.24963/ijcai.2018/84","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2018/84","pdf_url":"https://www.ijcai.org/proceedings/2018/0084.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Seventh International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.24963/ijcai.2018/84","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2018/84","pdf_url":"https://www.ijcai.org/proceedings/2018/0084.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Seventh International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G187184461","display_name":null,"funder_award_id":"61571269","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2311577451","display_name":null,"funder_award_id":"No. 61571269","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3770199858","display_name":null,"funder_award_id":"2017M610897","funder_id":"https://openalex.org/F4320321543","funder_display_name":"China Postdoctoral Science Foundation"},{"id":"https://openalex.org/G5574925684","display_name":null,"funder_award_id":"61701273","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5612697103","display_name":null,"funder_award_id":"2017M","funder_id":"https://openalex.org/F4320321543","funder_display_name":"China Postdoctoral Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321543","display_name":"China Postdoctoral Science Foundation","ror":"https://ror.org/0426zh255"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2808138519.pdf","grobid_xml":"https://content.openalex.org/works/W2808138519.grobid-xml"},"referenced_works_count":24,"referenced_works":["https://openalex.org/W1514535095","https://openalex.org/W1811254738","https://openalex.org/W1861492603","https://openalex.org/W1895577753","https://openalex.org/W1905882502","https://openalex.org/W1931639407","https://openalex.org/W1956340063","https://openalex.org/W2101105183","https://openalex.org/W2150824314","https://openalex.org/W2194775991","https://openalex.org/W2220981600","https://openalex.org/W2302086703","https://openalex.org/W2407414618","https://openalex.org/W2549365021","https://openalex.org/W2550553598","https://openalex.org/W2552161745","https://openalex.org/W2558834163","https://openalex.org/W2575842049","https://openalex.org/W2604522653","https://openalex.org/W2737766105","https://openalex.org/W2962706528","https://openalex.org/W2963084599","https://openalex.org/W2963745697","https://openalex.org/W2964018924"],"related_works":["https://openalex.org/W4210416330","https://openalex.org/W2775506363","https://openalex.org/W3088136942","https://openalex.org/W4290852288","https://openalex.org/W2949362007","https://openalex.org/W4283207562","https://openalex.org/W2963177403","https://openalex.org/W2330246314","https://openalex.org/W2949522393","https://openalex.org/W2993670781"],"abstract_inverted_index":{"Despite":[0],"the":[1,27,41,57,72,81,97,103,109,128,137,145,156,161,165],"fact":[2],"that":[3,118],"attribute-based":[4,19],"approaches":[5,8,20,168],"and":[6,120,143,158,173],"attention-based":[7,36,129],"have":[9],"been":[10],"proven":[11],"to":[12,56,107,150],"be":[13,52,87,124],"effective":[14],"in":[15,47,54,96],"image":[16,58,78,110,178],"captioning,":[17,79],"most":[18,35],"simply":[21],"predict":[22],"attributes":[23,31,85],"independently":[24],"without":[25],"taking":[26],"co-occurrence":[28,82],"dependencies":[29,83],"among":[30,84],"into":[32],"account.":[33],"Besides,":[34],"captioning":[37,130,146,167,179],"models":[38],"directly":[39],"leverage":[40],"feature":[42,139],"map":[43],"extracted":[44],"from":[45],"CNN,":[46],"which":[48],"many":[49],"features":[50,122],"may":[51],"redundant":[53],"relation":[55],"content.":[59],"In":[60],"this":[61,133],"paper,":[62],"we":[63,135],"focus":[64],"on":[65,175],"training":[66],"a":[67,100],"good":[68],"attribute-inference":[69],"model":[70,94,147],"via":[71],"recurrent":[73],"neural":[74],"network":[75],"(RNN)":[76],"for":[77,127,140],"where":[80],"can":[86],"maintained.":[88],"The":[89],"uniqueness":[90],"of":[91,99,160],"our":[92],"inference":[93],"lies":[95],"usage":[98],"RNN":[101],"with":[102],"visual":[104],"attention":[105],"mechanism":[106],"\\textit{observe}":[108],"before":[111],"generating":[112],"captions.":[113],"Additionally,":[114],"it":[115],"is":[116],"noticed":[117],"compact":[119],"attribute-driven":[121],"will":[123],"more":[125],"useful":[126],"model.":[131],"To":[132],"end,":[134],"extract":[136],"context":[138,152],"each":[141],"attribute,":[142],"guide":[144],"adaptively":[148],"attend":[149],"these":[151],"features.":[153],"We":[154],"verify":[155],"effectiveness":[157],"superiority":[159],"proposed":[162],"approach":[163],"over":[164],"other":[166],"by":[169],"conducting":[170],"massive":[171],"experiments":[172],"comparisons":[174],"MS":[176],"COCO":[177],"dataset.":[180]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":8},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":12},{"year":2020,"cited_by_count":21},{"year":2019,"cited_by_count":11},{"year":2018,"cited_by_count":1}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
