{"id":"https://openalex.org/W2890781596","doi":"https://doi.org/10.18653/v1/d18-1084","title":"Training for Diversity in Image Paragraph Captioning","display_name":"Training for Diversity in Image Paragraph Captioning","publication_year":2018,"publication_date":"2018-01-01","ids":{"openalex":"https://openalex.org/W2890781596","doi":"https://doi.org/10.18653/v1/d18-1084","mag":"2890781596"},"language":"en","primary_location":{"id":"doi:10.18653/v1/d18-1084","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/d18-1084","pdf_url":"https://www.aclweb.org/anthology/D18-1084.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/D18-1084.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5079016134","display_name":"Luke Melas-Kyriazi","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Luke Melas-Kyriazi","raw_affiliation_strings":["School of Engineering and Applied Sciences Harvard University"],"affiliations":[{"raw_affiliation_string":"School of Engineering and Applied Sciences Harvard University","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062546146","display_name":"Alexander M. Rush","orcid":"https://orcid.org/0000-0002-9900-1606"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alexander Rush","raw_affiliation_strings":["School of Engineering and Applied Sciences Harvard University"],"affiliations":[{"raw_affiliation_string":"School of Engineering and Applied Sciences Harvard University","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049731172","display_name":"George Han","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"George Han","raw_affiliation_strings":["School of Engineering and Applied Sciences Harvard University"],"affiliations":[{"raw_affiliation_string":"School of Engineering and Applied Sciences Harvard University","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5079016134"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":3.5516,"has_fulltext":true,"cited_by_count":64,"citation_normalized_percentile":{"value":0.94917266,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9886000156402588,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9843999743461609,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9929389953613281},{"id":"https://openalex.org/keywords/paragraph","display_name":"Paragraph","score":0.8566330075263977},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8094253540039062},{"id":"https://openalex.org/keywords/trigram","display_name":"Trigram","score":0.6208980083465576},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.6005373001098633},{"id":"https://openalex.org/keywords/repetition","display_name":"Repetition (rhetorical device)","score":0.5872802734375},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5756956338882446},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.538105845451355},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5379512906074524},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4948405623435974},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.47683706879615784},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.17019179463386536},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.06409066915512085}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9929389953613281},{"id":"https://openalex.org/C2777206241","wikidata":"https://www.wikidata.org/wiki/Q194431","display_name":"Paragraph","level":2,"score":0.8566330075263977},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8094253540039062},{"id":"https://openalex.org/C137546455","wikidata":"https://www.wikidata.org/wiki/Q3213474","display_name":"Trigram","level":2,"score":0.6208980083465576},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.6005373001098633},{"id":"https://openalex.org/C2776141515","wikidata":"https://www.wikidata.org/wiki/Q1274479","display_name":"Repetition (rhetorical device)","level":2,"score":0.5872802734375},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5756956338882446},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.538105845451355},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5379512906074524},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4948405623435974},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.47683706879615784},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.17019179463386536},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.06409066915512085},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/d18-1084","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/d18-1084","pdf_url":"https://www.aclweb.org/anthology/D18-1084.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/d18-1084","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/d18-1084","pdf_url":"https://www.aclweb.org/anthology/D18-1084.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2890781596.pdf","grobid_xml":"https://content.openalex.org/works/W2890781596.grobid-xml"},"referenced_works_count":17,"referenced_works":["https://openalex.org/W1514535095","https://openalex.org/W1861492603","https://openalex.org/W1895577753","https://openalex.org/W1956340063","https://openalex.org/W2176263492","https://openalex.org/W2277195237","https://openalex.org/W2525778437","https://openalex.org/W2549599535","https://openalex.org/W2560313346","https://openalex.org/W2564590796","https://openalex.org/W2605045867","https://openalex.org/W2612675303","https://openalex.org/W2737766105","https://openalex.org/W2949555952","https://openalex.org/W2951684117","https://openalex.org/W2963084599","https://openalex.org/W2963448089"],"related_works":["https://openalex.org/W4210416330","https://openalex.org/W3088136942","https://openalex.org/W2949362007","https://openalex.org/W2775506363","https://openalex.org/W4290852288","https://openalex.org/W4388893791","https://openalex.org/W4283207562","https://openalex.org/W2963177403","https://openalex.org/W4321098257","https://openalex.org/W2016501449"],"abstract_inverted_index":{"Image":[0],"paragraph":[1,91],"captioning":[2,21,92],"models":[3,14],"aim":[4],"to":[5,96],"produce":[6],"detailed":[7],"descriptions":[8],"of":[9,34],"a":[10,32],"source":[11],"image.":[12],"These":[13],"use":[15],"similar":[16],"techniques":[17],"as":[18,105],"standard":[19,57],"image":[20],"models,":[22],"but":[23,63],"they":[24],"have":[25,39],"encountered":[26],"issues":[27],"in":[28],"text":[29],"generation,":[30],"notably":[31],"lack":[33],"diversity":[35],"between":[36],"sentences,":[37],"that":[38,56],"limited":[40],"their":[41],"effectiveness.":[42],"In":[43],"this":[44,52],"work,":[45],"we":[46],"consider":[47],"applying":[48],"sequence-level":[49],"training":[50,59,80],"for":[51],"task.":[53],"We":[54],"find":[55],"self-critical":[58],"produces":[60,73],"poor":[61],"results,":[62],"when":[64],"combined":[65],"with":[66,99],"an":[67],"integrated":[68],"penalty":[69],"on":[70,83,87,101],"trigram":[71],"repetition":[72],"much":[74],"more":[75],"diverse":[76],"paragraphs.":[77],"This":[78],"simple":[79],"approach":[81],"improves":[82],"the":[84,88],"best":[85],"result":[86],"Visual":[89],"Genome":[90],"dataset":[93],"from":[94],"16.9":[95],"30.6":[97],"CIDEr,":[98],"gains":[100],"METEOR":[102],"and":[103],"BLEU":[104],"well,":[106],"without":[107],"requiring":[108],"any":[109],"architectural":[110],"changes.":[111]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":10},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":11},{"year":2021,"cited_by_count":13},{"year":2020,"cited_by_count":13},{"year":2019,"cited_by_count":8}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
