{"id":"https://openalex.org/W2920921207","doi":"https://doi.org/10.1117/12.2523174","title":"Sequence-to-sequence image caption generator","display_name":"Sequence-to-sequence image caption generator","publication_year":2019,"publication_date":"2019-03-15","ids":{"openalex":"https://openalex.org/W2920921207","doi":"https://doi.org/10.1117/12.2523174","mag":"2920921207"},"language":"en","primary_location":{"id":"doi:10.1117/12.2523174","is_oa":false,"landing_page_url":"https://doi.org/10.1117/12.2523174","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Eleventh International Conference on Machine Vision (ICMV 2018)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5044669719","display_name":"Rehab Alahmadi","orcid":null},"institutions":[{"id":"https://openalex.org/I193531525","display_name":"George Washington University","ror":"https://ror.org/00y4zzh67","country_code":"US","type":"education","lineage":["https://openalex.org/I193531525"]},{"id":"https://openalex.org/I28022161","display_name":"King Saud University","ror":"https://ror.org/02f81g417","country_code":"SA","type":"education","lineage":["https://openalex.org/I28022161"]}],"countries":["SA","US"],"is_corresponding":true,"raw_author_name":"Rehab Alahmadi","raw_affiliation_strings":["King Saud Univ. (Saudi Arabia)","The George Washington Univ. (United States)"],"affiliations":[{"raw_affiliation_string":"King Saud Univ. (Saudi Arabia)","institution_ids":["https://openalex.org/I28022161"]},{"raw_affiliation_string":"The George Washington Univ. (United States)","institution_ids":["https://openalex.org/I193531525"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005991642","display_name":"Chung Hyuk Park","orcid":"https://orcid.org/0000-0003-0742-6541"},"institutions":[{"id":"https://openalex.org/I193531525","display_name":"George Washington University","ror":"https://ror.org/00y4zzh67","country_code":"US","type":"education","lineage":["https://openalex.org/I193531525"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chung Hyuk Park","raw_affiliation_strings":["The George Washington Univ. (United States)"],"affiliations":[{"raw_affiliation_string":"The George Washington Univ. (United States)","institution_ids":["https://openalex.org/I193531525"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009074303","display_name":"James K. Hahn","orcid":"https://orcid.org/0000-0001-6535-8175"},"institutions":[{"id":"https://openalex.org/I193531525","display_name":"George Washington University","ror":"https://ror.org/00y4zzh67","country_code":"US","type":"education","lineage":["https://openalex.org/I193531525"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"James Hahn","raw_affiliation_strings":["The George Washington Univ. (United States)"],"affiliations":[{"raw_affiliation_string":"The George Washington Univ. (United States)","institution_ids":["https://openalex.org/I193531525"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5044669719"],"corresponding_institution_ids":["https://openalex.org/I193531525","https://openalex.org/I28022161"],"apc_list":null,"apc_paid":null,"fwci":0.5061,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.67855089,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"102","last_page":"102"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9933000206947327,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9923999905586243,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.8593398332595825},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8444575071334839},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.7577238082885742},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.7187173962593079},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.6961879134178162},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.6748926639556885},{"id":"https://openalex.org/keywords/generator","display_name":"Generator (circuit theory)","score":0.6587331295013428},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.6382936239242554},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6352745890617371},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.529751181602478},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.4400824308395386},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.40983814001083374},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.39820167422294617},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3784795105457306},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.37271231412887573}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.8593398332595825},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8444575071334839},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.7577238082885742},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.7187173962593079},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.6961879134178162},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.6748926639556885},{"id":"https://openalex.org/C2780992000","wikidata":"https://www.wikidata.org/wiki/Q17016113","display_name":"Generator (circuit theory)","level":3,"score":0.6587331295013428},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.6382936239242554},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6352745890617371},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.529751181602478},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.4400824308395386},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.40983814001083374},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.39820167422294617},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3784795105457306},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.37271231412887573},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C105580179","wikidata":"https://www.wikidata.org/wiki/Q188928","display_name":"Messenger RNA","level":3,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1117/12.2523174","is_oa":false,"landing_page_url":"https://doi.org/10.1117/12.2523174","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Eleventh International Conference on Machine Vision (ICMV 2018)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1593271688","https://openalex.org/W1811254738","https://openalex.org/W1899504021","https://openalex.org/W2133459682","https://openalex.org/W2154652894","https://openalex.org/W2157331557","https://openalex.org/W2277195237","https://openalex.org/W2549599535","https://openalex.org/W2964168617","https://openalex.org/W4236965008","https://openalex.org/W4301409532","https://openalex.org/W6630875275","https://openalex.org/W6631636882","https://openalex.org/W6638145986","https://openalex.org/W6638742206","https://openalex.org/W6639102338","https://openalex.org/W6639425484","https://openalex.org/W6639657675","https://openalex.org/W6639809013","https://openalex.org/W6640617836","https://openalex.org/W6641064462","https://openalex.org/W6681075545","https://openalex.org/W6682631176","https://openalex.org/W6683258052","https://openalex.org/W6691503852","https://openalex.org/W6698228248","https://openalex.org/W6728912905","https://openalex.org/W6734404982","https://openalex.org/W6898505805"],"related_works":["https://openalex.org/W4210416330","https://openalex.org/W2949362007","https://openalex.org/W2775506363","https://openalex.org/W4290852288","https://openalex.org/W3088136942","https://openalex.org/W4283207562","https://openalex.org/W2963177403","https://openalex.org/W2330246314","https://openalex.org/W2949522393","https://openalex.org/W4289422896"],"abstract_inverted_index":{"Recently,":[0],"image":[1,42,69],"captioning":[2],"has":[3],"received":[4],"much":[5],"attention":[6],"from":[7],"the":[8,15,19,54,73,80,83,92,95,106,110,113,120,123,128,135],"artificial-intelligent":[9],"(AI)":[10],"research":[11],"community.":[12],"Most":[13],"of":[14,32,88,112],"current":[16],"works":[17,34],"follow":[18],"encoder-decoder":[20,74],"machine":[21,75],"translation":[22,76],"model":[23,63,84,114,133],"to":[24,52,82],"automatically":[25],"generate":[26,53],"captions":[27],"for":[28],"images.":[29],"However,":[30],"most":[31],"these":[33],"used":[35],"Convolutional":[36],"Neural":[37,46],"Network":[38,47],"(CNN)":[39],"as":[40,49,67],"an":[41,68],"encoder":[43,70],"and":[44,118],"Recurrent":[45],"(RNN)":[48],"a":[50,61,86],"decoder":[51],"caption.":[55],"In":[56],"this":[57],"paper,":[58],"we":[59],"propose":[60],"sequence-to-sequence":[62],"that":[64,71,79,90,126],"uses":[65],"RNN":[66],"follows":[72],"model,":[77],"such":[78],"input":[81],"is":[85],"sequence":[87],"images":[89],"represents":[91],"objects":[93,98],"in":[94,105],"image.":[96],"These":[97],"are":[99],"ordered":[100],"based":[101],"on":[102,115,138],"their":[103],"order":[104],"captions.":[107],"We":[108],"demonstrate":[109],"results":[111,121],"Flickr30K":[116],"dataset":[117],"compare":[119],"with":[122],"state-ofthe-art":[124],"methods":[125,137],"use":[127],"same":[129],"dataset.":[130],"The":[131],"proposed":[132],"outperformed":[134],"state-of-the-art":[136],"all":[139],"metrics.":[140]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
