{"id":"https://openalex.org/W2560207749","doi":"https://doi.org/10.18653/v1/d17-1098","title":"Guided Open Vocabulary Image Captioning with Constrained Beam Search","display_name":"Guided Open Vocabulary Image Captioning with Constrained Beam Search","publication_year":2017,"publication_date":"2017-01-01","ids":{"openalex":"https://openalex.org/W2560207749","doi":"https://doi.org/10.18653/v1/d17-1098","mag":"2560207749"},"language":"en","primary_location":{"id":"doi:10.18653/v1/d17-1098","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/d17-1098","pdf_url":"https://www.aclweb.org/anthology/D17-1098.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2017 Conference on Empirical Methods in Natural\n          Language Processing","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/D17-1098.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020091255","display_name":"Peter Anderson","orcid":"https://orcid.org/0000-0002-6359-8586"},"institutions":[{"id":"https://openalex.org/I118347636","display_name":"Australian National University","ror":"https://ror.org/019wvm592","country_code":"AU","type":"education","lineage":["https://openalex.org/I118347636"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Peter Anderson","raw_affiliation_strings":["The Australian National University, Canberra, Australia","AUSTRALIAN NATIONAL UNIVERSITY"],"affiliations":[{"raw_affiliation_string":"The Australian National University, Canberra, Australia","institution_ids":["https://openalex.org/I118347636"]},{"raw_affiliation_string":"AUSTRALIAN NATIONAL UNIVERSITY","institution_ids":["https://openalex.org/I118347636"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090467618","display_name":"Basura Fernando","orcid":"https://orcid.org/0000-0002-6920-9916"},"institutions":[{"id":"https://openalex.org/I118347636","display_name":"Australian National University","ror":"https://ror.org/019wvm592","country_code":"AU","type":"education","lineage":["https://openalex.org/I118347636"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Basura Fernando","raw_affiliation_strings":["The Australian National University, Canberra, Australia","AUSTRALIAN NATIONAL UNIVERSITY"],"affiliations":[{"raw_affiliation_string":"The Australian National University, Canberra, Australia","institution_ids":["https://openalex.org/I118347636"]},{"raw_affiliation_string":"AUSTRALIAN NATIONAL UNIVERSITY","institution_ids":["https://openalex.org/I118347636"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034461489","display_name":"Mark Johnson","orcid":"https://orcid.org/0000-0003-4809-8441"},"institutions":[{"id":"https://openalex.org/I99043593","display_name":"Macquarie University","ror":"https://ror.org/01sf06y89","country_code":"AU","type":"education","lineage":["https://openalex.org/I99043593"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Mark Johnson","raw_affiliation_strings":["Macquarie University, Sydney, Australia","Department of Computing"],"affiliations":[{"raw_affiliation_string":"Macquarie University, Sydney, Australia","institution_ids":["https://openalex.org/I99043593"]},{"raw_affiliation_string":"Department of Computing","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5071663279","display_name":"Stephen Jay Gould","orcid":"https://orcid.org/0000-0001-8929-7899"},"institutions":[{"id":"https://openalex.org/I118347636","display_name":"Australian National University","ror":"https://ror.org/019wvm592","country_code":"AU","type":"education","lineage":["https://openalex.org/I118347636"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Stephen Gould","raw_affiliation_strings":["The Australian National University, Canberra, Australia","AUSTRALIAN NATIONAL UNIVERSITY"],"affiliations":[{"raw_affiliation_string":"The Australian National University, Canberra, Australia","institution_ids":["https://openalex.org/I118347636"]},{"raw_affiliation_string":"AUSTRALIAN NATIONAL UNIVERSITY","institution_ids":["https://openalex.org/I118347636"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5020091255"],"corresponding_institution_ids":["https://openalex.org/I118347636"],"apc_list":null,"apc_paid":null,"fwci":0.4621,"has_fulltext":true,"cited_by_count":17,"citation_normalized_percentile":{"value":0.71575089,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"936","last_page":"945"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9921000003814697,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9702000021934509,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9941662549972534},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8289287090301514},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.7699695825576782},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.6879341006278992},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.675747275352478},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.6755508184432983},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6247137188911438},{"id":"https://openalex.org/keywords/ground-truth","display_name":"Ground truth","score":0.49644023180007935},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3933376967906952},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3817240595817566},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.0650084912776947},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.05808541178703308}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9941662549972534},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8289287090301514},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.7699695825576782},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.6879341006278992},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.675747275352478},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.6755508184432983},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6247137188911438},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.49644023180007935},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3933376967906952},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3817240595817566},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0650084912776947},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.05808541178703308},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.18653/v1/d17-1098","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/d17-1098","pdf_url":"https://www.aclweb.org/anthology/D17-1098.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2017 Conference on Empirical Methods in Natural\n          Language Processing","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1612.00576","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1612.00576","pdf_url":"https://arxiv.org/pdf/1612.00576","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:openresearch-repository.anu.edu.au:1885/294445","is_oa":true,"landing_page_url":"http://hdl.handle.net/1885/294445","pdf_url":"https://openresearch-repository.anu.edu.au/bitstreams/342f0651-520b-44e2-a49b-53c328b0913a/download","source":{"id":"https://openalex.org/S4306402539","display_name":"ANU Open Research (Australian National University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I118347636","host_organization_name":"Australian National University","host_organization_lineage":["https://openalex.org/I118347636"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Proceedings of the Conference on Empirical Methods in Natural Language Processing, EMNLP2017","raw_type":"Conference paper"},{"id":"doi:10.48550/arxiv.1612.00576","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1612.00576","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"},{"id":"mag:2560207749","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":null}],"best_oa_location":{"id":"doi:10.18653/v1/d17-1098","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/d17-1098","pdf_url":"https://www.aclweb.org/anthology/D17-1098.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2017 Conference on Empirical Methods in Natural\n          Language Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.699999988079071}],"awards":[{"id":"https://openalex.org/G3483111202","display_name":null,"funder_award_id":"CE14010001","funder_id":"https://openalex.org/F4320334704","funder_display_name":"Australian Research Council"},{"id":"https://openalex.org/G3800762947","display_name":null,"funder_award_id":"CE140100016","funder_id":"https://openalex.org/F4320334704","funder_display_name":"Australian Research Council"},{"id":"https://openalex.org/G4327185696","display_name":null,"funder_award_id":"CE140100016","funder_id":"https://openalex.org/F4320331724","funder_display_name":"Australian Centre for Robotic Vision"}],"funders":[{"id":"https://openalex.org/F4320315885","display_name":"Australian Government","ror":"https://ror.org/0314h5y94"},{"id":"https://openalex.org/F4320331724","display_name":"Australian Centre for Robotic Vision","ror":"https://ror.org/02zv9xv82"},{"id":"https://openalex.org/F4320334704","display_name":"Australian Research Council","ror":"https://ror.org/05mmh0f86"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2560207749.pdf","grobid_xml":"https://content.openalex.org/works/W2560207749.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2808503835","https://openalex.org/W2983537698","https://openalex.org/W2890718122","https://openalex.org/W3174441232","https://openalex.org/W2997051332","https://openalex.org/W3122381352","https://openalex.org/W3035160838","https://openalex.org/W2973208549","https://openalex.org/W3039060838","https://openalex.org/W3119358226","https://openalex.org/W3159124846","https://openalex.org/W3043320985","https://openalex.org/W2526544345","https://openalex.org/W3045147787","https://openalex.org/W2997886689","https://openalex.org/W2946880267","https://openalex.org/W2578466053","https://openalex.org/W3002962084","https://openalex.org/W2617628589","https://openalex.org/W2971355948"],"abstract_inverted_index":{"Existing":[0],"image":[1,53],"captioning":[2,47,103],"models":[3,24],"do":[4],"not":[5],"generalize":[6],"well":[7],"to":[8,49,66,82,86],"out-of-domain":[9,102],"images":[10,31],"containing":[11],"novel":[12],"scenes":[13],"or":[14],"objects.":[15],"This":[16],"limitation":[17],"severely":[18],"hinders":[19],"the":[20,33,68,75,98,121,126,137],"use":[21],"of":[22,52,70,97,139],"these":[23],"in":[25,32,74],"real":[26],"world":[27],"applications":[28],"dealing":[29],"with":[30],"wild.":[34],"We":[35,129],"address":[36],"this":[37,92],"problem":[38],"using":[39],"a":[40],"flexible":[41],"approach":[42,93],"that":[43,119,132],"enables":[44],"existing":[45],"deep":[46],"architectures":[48],"take":[50],"advantage":[51],"taggers":[54],"at":[55],"test":[56],"time,":[57],"without":[58],"re-training.":[59],"Our":[60],"method":[61],"uses":[62],"constrained":[63],"beam":[64],"search":[65],"force":[67],"inclusion":[69],"selected":[71],"tag":[72,89,123],"words":[73],"output,":[76],"and":[77],"fixed,":[78],"pretrained":[79],"word":[80],"embeddings":[81],"facilitate":[83],"vocabulary":[84],"expansion":[85],"previously":[87],"unseen":[88],"words.":[90],"Using":[91],"we":[94,133],"achieve":[95],"state":[96],"art":[99],"results":[100,108,115],"for":[101,109],"on":[104],"MSCOCO":[105],"(and":[106],"improved":[107],"in-domain":[110],"captioning).":[111],"Perhaps":[112],"surprisingly,":[113],"our":[114],"significantly":[116,135],"outperform":[117],"approaches":[118],"incorporate":[120],"same":[122],"predictions":[124],"into":[125],"learning":[127],"algorithm.":[128],"also":[130],"show":[131],"can":[134],"improve":[136],"quality":[138],"generated":[140],"ImageNet":[141],"captions":[142],"by":[143],"leveraging":[144],"ground-truth":[145],"labels.":[146]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":2},{"year":2017,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
