{"id":"https://openalex.org/W4226111889","doi":"https://doi.org/10.3233/jifs-211907","title":"Attention based sequence-to-sequence framework for auto image caption generation","display_name":"Attention based sequence-to-sequence framework for auto image caption generation","publication_year":2022,"publication_date":"2022-04-08","ids":{"openalex":"https://openalex.org/W4226111889","doi":"https://doi.org/10.3233/jifs-211907"},"language":"en","primary_location":{"id":"doi:10.3233/jifs-211907","is_oa":false,"landing_page_url":"https://doi.org/10.3233/jifs-211907","pdf_url":null,"source":{"id":"https://openalex.org/S179157397","display_name":"Journal of Intelligent & Fuzzy Systems","issn_l":"1064-1246","issn":["1064-1246","1875-8967"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310318577","host_organization_name":"IOS Press","host_organization_lineage":["https://openalex.org/P4310318577"],"host_organization_lineage_names":["IOS Press"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Intelligent &amp; Fuzzy Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5078898508","display_name":"Rashid Khan","orcid":"https://orcid.org/0000-0002-2410-044X"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rashid Khan","raw_affiliation_strings":["National Engineering Laboratory for Speech and Language Information Processing, University of Science and Technology of China, Hefei, Anhui, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Laboratory for Speech and Language Information Processing, University of Science and Technology of China, Hefei, Anhui, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109585221","display_name":"M. Shujah Islam","orcid":null},"institutions":[{"id":"https://openalex.org/I140221134","display_name":"Anhui Agricultural University","ror":"https://ror.org/0327f3359","country_code":"CN","type":"education","lineage":["https://openalex.org/I140221134"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"M. Shujah Islam","raw_affiliation_strings":["Anhui Agriculture University, Hefei, Anhui, China"],"affiliations":[{"raw_affiliation_string":"Anhui Agriculture University, Hefei, Anhui, China","institution_ids":["https://openalex.org/I140221134"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070702816","display_name":"Khadija Kanwal","orcid":"https://orcid.org/0009-0001-2300-9636"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Khadija Kanwal","raw_affiliation_strings":["National Engineering Laboratory for Speech and Language Information Processing, University of Science and Technology of China, Hefei, Anhui, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Laboratory for Speech and Language Information Processing, University of Science and Technology of China, Hefei, Anhui, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026575214","display_name":"Mansoor Iqbal","orcid":null},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mansoor Iqbal","raw_affiliation_strings":["National Engineering Laboratory for Speech and Language Information Processing, University of Science and Technology of China, Hefei, Anhui, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Laboratory for Speech and Language Information Processing, University of Science and Technology of China, Hefei, Anhui, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100772358","display_name":"Md. Imran Hossain","orcid":"https://orcid.org/0000-0001-8030-8379"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Md. Imran Hossain","raw_affiliation_strings":["National Engineering Laboratory for Speech and Language Information Processing, University of Science and Technology of China, Hefei, Anhui, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Laboratory for Speech and Language Information Processing, University of Science and Technology of China, Hefei, Anhui, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065761465","display_name":"Zhongfu Ye","orcid":"https://orcid.org/0000-0002-3207-2258"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhongfu Ye","raw_affiliation_strings":["National Engineering Laboratory for Speech and Language Information Processing, University of Science and Technology of China, Hefei, Anhui, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Laboratory for Speech and Language Information Processing, University of Science and Technology of China, Hefei, Anhui, China","institution_ids":["https://openalex.org/I126520041"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5065761465"],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":0.4026,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.57815092,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"43","issue":"1","first_page":"159","last_page":"170"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9937000274658203,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.882696270942688},{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.7838547229766846},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6542929410934448},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.6464449167251587},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6118887066841125},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.5266019105911255},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.5098901987075806},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.5090696215629578},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.5023202896118164},{"id":"https://openalex.org/keywords/bleu","display_name":"BLEU","score":0.496390163898468},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.4932789206504822},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.47964173555374146},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4707401692867279},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4544655382633209},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4471052289009094},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4350188374519348},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.4311734139919281},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.42896419763565063},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.32862138748168945},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.3023669421672821},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.2180178463459015},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.06872570514678955}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.882696270942688},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.7838547229766846},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6542929410934448},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.6464449167251587},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6118887066841125},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.5266019105911255},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.5098901987075806},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.5090696215629578},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.5023202896118164},{"id":"https://openalex.org/C622187","wikidata":"https://www.wikidata.org/wiki/Q3500773","display_name":"BLEU","level":3,"score":0.496390163898468},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.4932789206504822},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.47964173555374146},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4707401692867279},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4544655382633209},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4471052289009094},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4350188374519348},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.4311734139919281},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.42896419763565063},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.32862138748168945},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3023669421672821},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2180178463459015},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.06872570514678955},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.3233/jifs-211907","is_oa":false,"landing_page_url":"https://doi.org/10.3233/jifs-211907","pdf_url":null,"source":{"id":"https://openalex.org/S179157397","display_name":"Journal of Intelligent & Fuzzy Systems","issn_l":"1064-1246","issn":["1064-1246","1875-8967"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310318577","host_organization_name":"IOS Press","host_organization_lineage":["https://openalex.org/P4310318577"],"host_organization_lineage_names":["IOS Press"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Intelligent &amp; Fuzzy Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W1969616664","https://openalex.org/W2102381086","https://openalex.org/W2766261529","https://openalex.org/W2789930422","https://openalex.org/W2803408761","https://openalex.org/W2898941977","https://openalex.org/W2907158289","https://openalex.org/W2907279673","https://openalex.org/W2908056713","https://openalex.org/W2963656855","https://openalex.org/W2979739834","https://openalex.org/W3036496243","https://openalex.org/W3037960791","https://openalex.org/W3084391106","https://openalex.org/W3115844225","https://openalex.org/W3138298063","https://openalex.org/W3195607060","https://openalex.org/W6600548291","https://openalex.org/W6748466699"],"related_works":["https://openalex.org/W4210416330","https://openalex.org/W3088136942","https://openalex.org/W2949362007","https://openalex.org/W2775506363","https://openalex.org/W4283822356","https://openalex.org/W3031223029","https://openalex.org/W1950940422","https://openalex.org/W2129146436","https://openalex.org/W2032507829","https://openalex.org/W2147282173"],"abstract_inverted_index":{"Caption":[0],"generation":[1,35],"using":[2,37],"an":[3,38,53,71,98,108],"encoder-decoder":[4,48],"approach":[5,30,208],"has":[6,134],"recently":[7],"been":[8],"extensively":[9],"studied":[10],"and":[11,19,97,130,139,157,164,189,202,228],"implemented":[12],"in":[13,52,172,215],"various":[14],"domains,":[15],"including":[16],"image":[17,73,109],"captioning":[18],"code":[20],"captioning.":[21],"In":[22],"this":[23],"research":[24],"article,":[25],"we":[26],"propose":[27],"one":[28],"particular":[29],"for":[31,122],"completing":[32],"a":[33,46,58,67,78,92,112],"capture":[34],"task":[36],"\u201cattention\u201d-based":[39],"sequence-to-sequence":[40],"framework":[41],"that,":[42],"when":[43],"combined":[44],"with":[45,211],"conventional":[47],"model,":[49],"generates":[50,66],"captions":[51],"attention-based":[54,99],"manner.":[55],"ResNet-152":[56],"is":[57,209],"Convolutional":[59],"Neural":[60,94],"Network":[61,95],"(CNN)":[62],"based":[63],"encoder":[64],"that":[65,76,222],"comprehensive":[68],"representation":[69],"of":[70,107,114,217],"input":[72],"while":[74],"embedding":[75],"into":[77],"fixed":[79],"size":[80],"length":[81],"vector.":[82],"To":[83],"predict":[84],"the":[85,88,124,131,145,150,162,169,175],"next":[86],"sentence,":[87],"decoder":[89],"uses":[90],"LSTM,":[91],"Recurrent":[93],"(RNN),":[96],"mechanism":[100],"to":[101,116,126,174],"concentrate":[102],"attention":[103],"on":[104,161,204],"certain":[105],"sections":[106],"selectively.":[110],"Define":[111],"set":[113],"epochs":[115],"69,":[117],"which":[118],"should":[119],"be":[120],"enough":[121],"training":[123],"model":[125],"generate":[127],"informative":[128],"descriptions,":[129],"validation":[132],"loss":[133],"reached":[135],"its":[136],"minimum":[137],"limit":[138],"no":[140],"longer":[141],"decreases.":[142],"We":[143],"present":[144],"datasets":[146,167],"as":[147,149,153,155],"well":[148,154],"evaluation":[151,180,219],"metrics,":[152],"quantitative":[156],"qualitative":[158],"analysis.":[159],"Experiments":[160],"MSCOCO":[163],"Flickr8k":[165,193],"benchmark":[166],"illustrate":[168],"model\u2019s":[170],"efficacy":[171],"comparison":[173],"baseline":[176],"techniques.":[177],"On":[178],"MSCOCO,":[179],"scores":[181],"included":[182],"BLEU-1":[183,194],"0.81,":[184],"BLEU-2":[185,196],"0.61,":[186],"BLEU-3":[187,198],"0.47,":[188],"0.33":[190],"METEOR.":[191],"For":[192],"0.68,":[195],"0.49,":[197],"0.41,":[199],"METEOR":[200],"0.23,":[201],"0.86":[203],"SPICE.":[205],"The":[206],"proposed":[207],"comparable":[210],"several":[212],"state-of-the-art":[213],"methods":[214],"terms":[216],"standard":[218],"metric,":[220],"demonstrating":[221],"it":[223],"can":[224],"produce":[225],"more":[226],"accurate":[227],"richer":[229],"captions.":[230]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
