{"id":"https://openalex.org/W4408727265","doi":"https://doi.org/10.1109/ipas63548.2025.10924492","title":"Optimizing Multimodal Transformers for Medical Image Captioning: Enhancing Automated Descriptions via AI Systems","display_name":"Optimizing Multimodal Transformers for Medical Image Captioning: Enhancing Automated Descriptions via AI Systems","publication_year":2025,"publication_date":"2025-01-09","ids":{"openalex":"https://openalex.org/W4408727265","doi":"https://doi.org/10.1109/ipas63548.2025.10924492"},"language":"en","primary_location":{"id":"doi:10.1109/ipas63548.2025.10924492","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipas63548.2025.10924492","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE 6th International Conference on Image Processing, Applications and Systems (IPAS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5116608117","display_name":"Mithila Arman","orcid":null},"institutions":[{"id":"https://openalex.org/I5518804","display_name":"BRAC University","ror":"https://ror.org/00sge8677","country_code":"BD","type":"education","lineage":["https://openalex.org/I5518804"]}],"countries":["BD"],"is_corresponding":true,"raw_author_name":"Mithila Arman","raw_affiliation_strings":["BRAC University,Dept. of CSE,Dhaka,Bangladesh"],"affiliations":[{"raw_affiliation_string":"BRAC University,Dept. of CSE,Dhaka,Bangladesh","institution_ids":["https://openalex.org/I5518804"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017408589","display_name":"Md. Khurshid Jahan","orcid":"https://orcid.org/0000-0002-1289-2435"},"institutions":[{"id":"https://openalex.org/I157386601","display_name":"North South University","ror":"https://ror.org/05wdbfp45","country_code":"BD","type":"education","lineage":["https://openalex.org/I157386601"]}],"countries":["BD"],"is_corresponding":false,"raw_author_name":"Md. Khurshid Jahan","raw_affiliation_strings":["North South University,Dept. of ECE,Dhaka,Bangladesh"],"affiliations":[{"raw_affiliation_string":"North South University,Dept. of ECE,Dhaka,Bangladesh","institution_ids":["https://openalex.org/I157386601"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107505841","display_name":"Ahmed Faizul Haque Dhrubo","orcid":null},"institutions":[{"id":"https://openalex.org/I157386601","display_name":"North South University","ror":"https://ror.org/05wdbfp45","country_code":"BD","type":"education","lineage":["https://openalex.org/I157386601"]}],"countries":["BD"],"is_corresponding":false,"raw_author_name":"Ahmed Faizul Haque Dhrubo","raw_affiliation_strings":["North South University,Dept. of ECE,Dhaka,Bangladesh"],"affiliations":[{"raw_affiliation_string":"North South University,Dept. of ECE,Dhaka,Bangladesh","institution_ids":["https://openalex.org/I157386601"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067924436","display_name":"M. M. Rhaman","orcid":"https://orcid.org/0000-0002-1950-444X"},"institutions":[{"id":"https://openalex.org/I162714631","display_name":"George Mason University","ror":"https://ror.org/02jqj7156","country_code":"US","type":"education","lineage":["https://openalex.org/I162714631"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Md. Mahfuzur Rhaman","raw_affiliation_strings":["George Mason University,Dept. of Computational Data Science,Fairfax,Virginia,USA"],"affiliations":[{"raw_affiliation_string":"George Mason University,Dept. of Computational Data Science,Fairfax,Virginia,USA","institution_ids":["https://openalex.org/I162714631"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049765549","display_name":"Sumaya Binte Zilani Choya","orcid":"https://orcid.org/0009-0007-6584-6890"},"institutions":[{"id":"https://openalex.org/I103434671","display_name":"American International University-Bangladesh","ror":"https://ror.org/02j8ga255","country_code":"BD","type":"education","lineage":["https://openalex.org/I103434671"]}],"countries":["BD"],"is_corresponding":false,"raw_author_name":"Sumaya Binte Zilani Choya","raw_affiliation_strings":["American International University-Bangladesh,Dept. of CSE,Dhaka,Bangladesh"],"affiliations":[{"raw_affiliation_string":"American International University-Bangladesh,Dept. of CSE,Dhaka,Bangladesh","institution_ids":["https://openalex.org/I103434671"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5116745095","display_name":"Din Mohammad Dohan","orcid":null},"institutions":[{"id":"https://openalex.org/I5518804","display_name":"BRAC University","ror":"https://ror.org/00sge8677","country_code":"BD","type":"education","lineage":["https://openalex.org/I5518804"]}],"countries":["BD"],"is_corresponding":false,"raw_author_name":"Din Mohammad Dohan","raw_affiliation_strings":["BRAC University,Dept. of CSE,Dhaka,Bangladesh"],"affiliations":[{"raw_affiliation_string":"BRAC University,Dept. of CSE,Dhaka,Bangladesh","institution_ids":["https://openalex.org/I5518804"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104249917","display_name":"Md. Ashiq Ul Islam Sajid","orcid":null},"institutions":[{"id":"https://openalex.org/I5518804","display_name":"BRAC University","ror":"https://ror.org/00sge8677","country_code":"BD","type":"education","lineage":["https://openalex.org/I5518804"]}],"countries":["BD"],"is_corresponding":false,"raw_author_name":"Md. Ashiq Ul Islam Sajid","raw_affiliation_strings":["BRAC University,Dept. of CSE,Dhaka,Bangladesh"],"affiliations":[{"raw_affiliation_string":"BRAC University,Dept. of CSE,Dhaka,Bangladesh","institution_ids":["https://openalex.org/I5518804"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5051981813","display_name":"Md. Golam Rabiul Alam","orcid":"https://orcid.org/0000-0002-9054-7557"},"institutions":[{"id":"https://openalex.org/I5518804","display_name":"BRAC University","ror":"https://ror.org/00sge8677","country_code":"BD","type":"education","lineage":["https://openalex.org/I5518804"]}],"countries":["BD"],"is_corresponding":false,"raw_author_name":"Md. Golam Rabiul Alam","raw_affiliation_strings":["BRAC University,Dept. of CSE,Dhaka,Bangladesh"],"affiliations":[{"raw_affiliation_string":"BRAC University,Dept. of CSE,Dhaka,Bangladesh","institution_ids":["https://openalex.org/I5518804"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5116608117"],"corresponding_institution_ids":["https://openalex.org/I5518804"],"apc_list":null,"apc_paid":null,"fwci":4.8058,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.94593772,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.945900022983551,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9239000082015991,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.8784624338150024},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7068400382995605},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6074932813644409},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4747740924358368},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4458049535751343},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.41753655672073364},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.35487836599349976},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.15086892247200012},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.09990134835243225}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.8784624338150024},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7068400382995605},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6074932813644409},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4747740924358368},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4458049535751343},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.41753655672073364},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.35487836599349976},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.15086892247200012},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.09990134835243225},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ipas63548.2025.10924492","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipas63548.2025.10924492","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE 6th International Conference on Image Processing, Applications and Systems (IPAS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W2981851019","https://openalex.org/W3090449556","https://openalex.org/W3176687820","https://openalex.org/W4297672463","https://openalex.org/W4367674159","https://openalex.org/W4387643061","https://openalex.org/W6766673545","https://openalex.org/W6775188310","https://openalex.org/W6789753369","https://openalex.org/W6811013733","https://openalex.org/W6850204008","https://openalex.org/W6906327309"],"related_works":["https://openalex.org/W4310447809","https://openalex.org/W4200243030","https://openalex.org/W2800782462","https://openalex.org/W3209117276","https://openalex.org/W4388184981","https://openalex.org/W4323777661","https://openalex.org/W3164229987","https://openalex.org/W3215212336","https://openalex.org/W4290852288","https://openalex.org/W3217388757"],"abstract_inverted_index":{"In":[0,125],"contemporary":[1],"diagnostic":[2,160],"workflows,":[3],"medical":[4,26,38,157,166],"image":[5,39,167],"captioning":[6],"has":[7],"emerged":[8],"as":[9,56],"a":[10,63],"pivotal":[11],"advancement,":[12],"combining":[13],"deep":[14],"learning":[15],"methodologies":[16],"and":[17,23,47,59,77,101,117,122,162],"transformer":[18,90],"architectures":[19,72],"to":[20,68,74],"enhance":[21],"accuracy":[22],"efficiency":[24],"in":[25,151],"interpretations.":[27],"This":[28],"paper":[29],"proposes":[30],"the":[31,86,94,108,114,133,145,153],"optimization":[32],"of":[33,88,148],"multimodal":[34,65],"transformers":[35],"for":[36,81,135],"automated":[37,165],"captioning,":[40],"focusing":[41],"on":[42,156],"integrating":[43],"Vision":[44],"Transformers":[45,50,58],"(ViT)":[46],"Bidirectional":[48],"Auto-Regressive":[49],"(BART)":[51],"with":[52],"novel":[53],"variations":[54],"such":[55],"Swin":[57],"GPT-2.":[60],"We":[61,84],"use":[62],"robust":[64],"AI":[66],"framework":[67],"explore":[69],"how":[70],"these":[71],"synergize":[73],"generate":[75],"coherent":[76],"diagnostically":[78],"relevant":[79],"captions":[80],"radiological":[82],"images.":[83],"assess":[85],"performance":[87],"multiple":[89],"models":[91,150],"by":[92],"employing":[93],"ROCO":[95],"dataset,":[96],"containing":[97],"paired":[98],"X-ray":[99],"images":[100],"expert-generated":[102],"reports.":[103],"Our":[104],"findings":[105],"demonstrate":[106],"that":[107],"ViT":[109],"+":[110,128],"BART":[111],"combination":[112],"yields":[113],"most":[115],"stable":[116],"accurate":[118],"captions,":[119],"minimizing":[120],"training":[121],"validation":[123],"loss.":[124],"contrast,":[126],"DEiT":[127],"MBART":[129],"displayed":[130],"instability,":[131],"highlighting":[132],"need":[134],"further":[136],"hyperparameter":[137],"tuning.":[138],"Through":[139],"this":[140],"comparative":[141],"analysis,":[142],"we":[143],"underscore":[144],"critical":[146],"role":[147],"transformer-based":[149],"reducing":[152],"cognitive":[154],"load":[155],"professionals,":[158],"enhancing":[159],"accuracy,":[161],"promoting":[163],"real-time,":[164],"interpretation.":[168]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":2}],"updated_date":"2026-04-13T07:58:08.660418","created_date":"2025-10-10T00:00:00"}
