{"id":"https://openalex.org/W3213610433","doi":"https://doi.org/10.1109/icccnt51525.2021.9579967","title":"Image Caption Generator Using Attention Mechanism","display_name":"Image Caption Generator Using Attention Mechanism","publication_year":2021,"publication_date":"2021-07-06","ids":{"openalex":"https://openalex.org/W3213610433","doi":"https://doi.org/10.1109/icccnt51525.2021.9579967","mag":"3213610433"},"language":"en","primary_location":{"id":"doi:10.1109/icccnt51525.2021.9579967","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icccnt51525.2021.9579967","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 12th International Conference on Computing Communication and Networking Technologies (ICCCNT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016174756","display_name":"Vaishnavi Agrawal","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Vaishnavi Agrawal","raw_affiliation_strings":["Electronics and Telecommunication Engineering College of Engineering, Pune, Pune, India"],"affiliations":[{"raw_affiliation_string":"Electronics and Telecommunication Engineering College of Engineering, Pune, Pune, India","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029241614","display_name":"Shariva Dhekane","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shariva Dhekane","raw_affiliation_strings":["Electronics and Telecommunication Engineering College of Engineering, Pune, Pune, India"],"affiliations":[{"raw_affiliation_string":"Electronics and Telecommunication Engineering College of Engineering, Pune, Pune, India","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087091295","display_name":"Neha Tuniya","orcid":"https://orcid.org/0000-0002-1957-8617"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Neha Tuniya","raw_affiliation_strings":["Electronics and Telecommunication Engineering College of Engineering, Pune, Pune, India"],"affiliations":[{"raw_affiliation_string":"Electronics and Telecommunication Engineering College of Engineering, Pune, Pune, India","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5084770257","display_name":"Vibha Vyas","orcid":"https://orcid.org/0000-0001-9845-7321"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vibha Vyas","raw_affiliation_strings":["Electronics and Telecommunication Engineering College of Engineering, Pune, Pune, India"],"affiliations":[{"raw_affiliation_string":"Electronics and Telecommunication Engineering College of Engineering, Pune, Pune, India","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5016174756"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.441,"has_fulltext":false,"cited_by_count":18,"citation_normalized_percentile":{"value":0.84320261,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9901999831199646,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9857000112533569,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8491351008415222},{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.8476771116256714},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6935120820999146},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.6228731870651245},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.612824559211731},{"id":"https://openalex.org/keywords/generator","display_name":"Generator (circuit theory)","score":0.6126917600631714},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5755269527435303},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.5200690627098083},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.49779844284057617},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.48599228262901306},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.4493359923362732},{"id":"https://openalex.org/keywords/salient","display_name":"Salient","score":0.4279819428920746},{"id":"https://openalex.org/keywords/mechanism","display_name":"Mechanism (biology)","score":0.4216427803039551},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.4136938750743866},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.40478453040122986},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.3832945227622986}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8491351008415222},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.8476771116256714},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6935120820999146},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.6228731870651245},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.612824559211731},{"id":"https://openalex.org/C2780992000","wikidata":"https://www.wikidata.org/wiki/Q17016113","display_name":"Generator (circuit theory)","level":3,"score":0.6126917600631714},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5755269527435303},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.5200690627098083},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.49779844284057617},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.48599228262901306},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.4493359923362732},{"id":"https://openalex.org/C2780719617","wikidata":"https://www.wikidata.org/wiki/Q1030752","display_name":"Salient","level":2,"score":0.4279819428920746},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.4216427803039551},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.4136938750743866},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.40478453040122986},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3832945227622986},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icccnt51525.2021.9579967","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icccnt51525.2021.9579967","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 12th International Conference on Computing Communication and Networking Technologies (ICCCNT)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7699999809265137,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W2463955103","https://openalex.org/W2755889034","https://openalex.org/W2904596204","https://openalex.org/W2910121883","https://openalex.org/W2944851425","https://openalex.org/W2957634299","https://openalex.org/W3006628257","https://openalex.org/W3023584964","https://openalex.org/W3033562610","https://openalex.org/W3038756037","https://openalex.org/W3091255494","https://openalex.org/W3100321043","https://openalex.org/W4237896256","https://openalex.org/W6765950523"],"related_works":["https://openalex.org/W4312417841","https://openalex.org/W4321369474","https://openalex.org/W2731899572","https://openalex.org/W3133861977","https://openalex.org/W4200173597","https://openalex.org/W3116150086","https://openalex.org/W3151908889","https://openalex.org/W2999805992","https://openalex.org/W4291897433","https://openalex.org/W2547835662"],"abstract_inverted_index":{"Image":[0],"captioning":[1],"is":[2,104,207,212],"used":[3,213],"to":[4,28,83,96,111,161,176,191,214,225],"generate":[5,45,71,101,162,192,197,229],"sentences":[6,78],"describing":[7],"the":[8,12,20,30,34,37,46,53,64,74,163,180,199,216,221],"scene":[9],"captured":[10],"in":[11,19,73,87],"form":[13,75],"of":[14,33,66,76,94,115,179],"images.":[15],"It":[16,165],"identifies":[17,39],"objects":[18],"image,":[21,54],"performs":[22],"a":[23,105,109,148,159,167,184,193],"few":[24],"operations,":[25],"and":[26,49,60,100,121,140,158,182,228],"tries":[27],"find":[29],"salient":[31],"features":[32,178],"image.":[35],"Once":[36],"system":[38,149],"this":[40],"information,":[41],"it":[42],"should":[43,56],"further":[44],"most":[47],"relevant":[48,194],"brief":[50,127],"description":[51],"for":[52,108],"which":[55],"be":[57,81],"both":[58],"syntactically":[59],"semantically":[61],"correct.":[62],"With":[63],"advancements":[65],"Learning":[67,142],"techniques,":[68],"algorithms":[69],"can":[70],"text":[72,103],"natural":[77,92],"that":[79,150,206],"will":[80],"able":[82],"describe":[84],"an":[85,152,156,203],"image":[86,98,116,181],"its":[88],"best":[89],"form.":[90],"The":[91,113,123,218],"ability":[93,224],"humans":[95],"understand":[97,226],"content":[99],"descriptive":[102],"challenging":[106],"task":[107,124],"machine":[110],"imitate.":[112],"applications":[114],"caption":[117],"generation":[118],"are":[119],"extensive":[120],"significant.":[122],"involves":[125],"generating":[126],"captions":[128],"using":[129],"various":[130],"techniques":[131],"like":[132],"Natural":[133],"language":[134],"processing":[135],"(NLP),":[136],"Computer":[137],"vision":[138],"(CV),":[139],"Deep":[141],"(DL)":[143],"techniques.":[144],"This":[145],"paper":[146],"introduces":[147],"uses":[151,166,202],"attention":[153,204],"mechanism":[154,205],"alongside":[155],"encoder":[157],"decoder":[160],"captions.":[164],"pre-trained":[168],"Convolutional":[169],"Neural":[170,186],"Network":[171,187],"(CNN)":[172],"viz.":[173,189],"Inception":[174],"V3":[175],"extract":[177],"then":[183],"Recurrent":[185],"(RNN)":[188],"GRU":[190],"caption.":[195],"To":[196],"captions,":[198],"proposed":[200],"model":[201],"Bahdanau":[208],"attention.":[209],"MS-COCO":[210],"dataset":[211],"train":[215],"model.":[217],"results":[219],"validate":[220],"model's":[222],"reasonable":[223],"images":[227],"text.":[230]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
