{"id":"https://openalex.org/W7154748957","doi":"https://doi.org/10.5753/jbcs.2026.5857","title":"Brazilian Portuguese Image Captioning with Transformers: A Study on Cross-Native-Translated Dataset","display_name":"Brazilian Portuguese Image Captioning with Transformers: A Study on Cross-Native-Translated Dataset","publication_year":2026,"publication_date":"2026-04-15","ids":{"openalex":"https://openalex.org/W7154748957","doi":"https://doi.org/10.5753/jbcs.2026.5857"},"language":null,"primary_location":{"id":"doi:10.5753/jbcs.2026.5857","is_oa":true,"landing_page_url":"https://doi.org/10.5753/jbcs.2026.5857","pdf_url":null,"source":{"id":"https://openalex.org/S69801987","display_name":"Journal of the Brazilian Computer Society","issn_l":"0104-6500","issn":["0104-6500","1678-4804"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of the Brazilian Computer Society","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.5753/jbcs.2026.5857","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114319953","display_name":"Gabriel Bromonschenkel","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Gabriel Bromonschenkel","raw_affiliation_strings":[],"raw_orcid":"https://orcid.org/0009-0000-6816-7913","affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5105938488","display_name":"Alessando L. Koerich","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alessandro L. Koerich","raw_affiliation_strings":[],"raw_orcid":"https://orcid.org/0000-0001-5879-7014","affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020928137","display_name":"Thiago M. Paix\u00e3o","orcid":"https://orcid.org/0000-0003-1554-6834"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Thiago M. Paix\u00e3o","raw_affiliation_strings":[],"raw_orcid":"https://orcid.org/0000-0003-1554-6834","affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5029524369","display_name":"Hil\u00e1rio Oliveira","orcid":"https://orcid.org/0000-0003-0643-7206"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hil\u00e1rio Tomaz Alves De Oliveira","raw_affiliation_strings":[],"raw_orcid":"https://orcid.org/0000-0003-0643-7206","affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5114319953"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.67053852,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"32","issue":"1","first_page":"663","last_page":"676"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.982699990272522,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.982699990272522,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.0017000000225380063,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13310","display_name":"Subtitles and Audiovisual Media","score":0.001500000013038516,"subfield":{"id":"https://openalex.org/subfields/1203","display_name":"Language and Linguistics"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.7851999998092651},{"id":"https://openalex.org/keywords/portuguese","display_name":"Portuguese","score":0.6639999747276306},{"id":"https://openalex.org/keywords/brazilian-portuguese","display_name":"Brazilian Portuguese","score":0.5558000206947327},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5004000067710876},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.4925999939441681},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.4740000069141388},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.4700999855995178},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.45590001344680786}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8134999871253967},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.7851999998092651},{"id":"https://openalex.org/C35219183","wikidata":"https://www.wikidata.org/wiki/Q5146","display_name":"Portuguese","level":2,"score":0.6639999747276306},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6395000219345093},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6068999767303467},{"id":"https://openalex.org/C2778880076","wikidata":"https://www.wikidata.org/wiki/Q750553","display_name":"Brazilian Portuguese","level":3,"score":0.5558000206947327},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5004000067710876},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.4925999939441681},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.4740000069141388},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.4700999855995178},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.45590001344680786},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.4406999945640564},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.41990000009536743},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.3716000020503998},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.35530000925064087},{"id":"https://openalex.org/C527412718","wikidata":"https://www.wikidata.org/wiki/Q855395","display_name":"Interpretation (philosophy)","level":2,"score":0.3386000096797943},{"id":"https://openalex.org/C518677369","wikidata":"https://www.wikidata.org/wiki/Q202833","display_name":"Social media","level":2,"score":0.33009999990463257},{"id":"https://openalex.org/C2776187449","wikidata":"https://www.wikidata.org/wiki/Q1513879","display_name":"Natural language generation","level":3,"score":0.299699991941452},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.2718000113964081},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.2705000042915344},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2694999873638153},{"id":"https://openalex.org/C62230096","wikidata":"https://www.wikidata.org/wiki/Q275969","display_name":"Crowdsourcing","level":2,"score":0.2662999927997589},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.25920000672340393}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.5753/jbcs.2026.5857","is_oa":true,"landing_page_url":"https://doi.org/10.5753/jbcs.2026.5857","pdf_url":null,"source":{"id":"https://openalex.org/S69801987","display_name":"Journal of the Brazilian Computer Society","issn_l":"0104-6500","issn":["0104-6500","1678-4804"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of the Brazilian Computer Society","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.5753/jbcs.2026.5857","is_oa":true,"landing_page_url":"https://doi.org/10.5753/jbcs.2026.5857","pdf_url":null,"source":{"id":"https://openalex.org/S69801987","display_name":"Journal of the Brazilian Computer Society","issn_l":"0104-6500","issn":["0104-6500","1678-4804"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of the Brazilian Computer Society","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/5","score":0.49837806820869446,"display_name":"Gender equality"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W1895577753","https://openalex.org/W1956340063","https://openalex.org/W2101105183","https://openalex.org/W2564590796","https://openalex.org/W3046314183","https://openalex.org/W3136908527","https://openalex.org/W3138516171","https://openalex.org/W3153469116","https://openalex.org/W3216130706","https://openalex.org/W4285141201","https://openalex.org/W4307280064","https://openalex.org/W4366123788","https://openalex.org/W4382459038","https://openalex.org/W4386065564","https://openalex.org/W4386162736","https://openalex.org/W4389524249","https://openalex.org/W4389524413","https://openalex.org/W4392188261","https://openalex.org/W4400527793","https://openalex.org/W4402474492","https://openalex.org/W4403535099","https://openalex.org/W7131794162"],"related_works":[],"abstract_inverted_index":{"Image":[0],"captioning":[1],"(IC)":[2],"refers":[3],"to":[4,22,47,64,107,116,135,154],"the":[5,48,133,137,151,156,196],"automatic":[6],"generation":[7,21],"of":[8,50,78,92,95],"natural":[9],"language":[10,82],"descriptions":[11],"for":[12,84,145],"images,":[13],"with":[14,25,110],"applications":[15],"ranging":[16],"from":[17,114],"social":[18],"media":[19],"content":[20],"assisting":[23],"individuals":[24],"visual":[26],"impairments.":[27],"While":[28],"most":[29],"research":[30],"has":[31],"been":[32],"focused":[33],"on":[34,127,132],"English-based":[35],"models,":[36,167],"low-resource":[37],"languages":[38],"such":[39],"as":[40],"Brazilian":[41,85,101,175],"Portuguese":[42,86,102,176],"face":[43],"significant":[44],"challenges":[45],"due":[46],"lack":[49],"specialized":[51],"datasets":[52,58],"and":[53,81,104,149,214],"models.":[54],"Several":[55],"studies":[56],"create":[57],"by":[59,73,99],"automatically":[60,112],"translating":[61],"existing":[62],"ones":[63],"mitigate":[65],"resource":[66],"scarcity.":[67],"This":[68],"work":[69],"addresses":[70],"this":[71],"gap":[72],"proposing":[74],"a":[75,90,108,121,174],"cross-native-translated":[76],"evaluation":[77,190],"Transformer-based":[79],"vision":[80],"models":[83,125,182,194],"IC.":[87],"We":[88],"use":[89,150],"version":[91,109],"Flickr30K":[93],"comprised":[94],"captions":[96,111],"manually":[97],"created":[98],"native":[100],"speakers":[103],"compare":[105],"it":[106],"translated":[113],"English":[115],"Portuguese.":[117],"The":[118],"experiments":[119],"include":[120],"cross-context":[122],"approach,":[123],"where":[124],"trained":[126],"one":[128],"dataset":[129],"are":[130],"tested":[131],"other":[134,166],"assess":[136],"translation":[138],"impact.":[139],"Additionally,":[140],"we":[141],"incorporate":[142],"attention":[143],"maps":[144],"model":[146],"inference":[147],"interpretation":[148],"CLIP-Score":[152],"metric":[153],"evaluate":[155],"image-description":[157],"alignment.":[158,202],"Our":[159],"findings":[160],"show":[161],"that":[162],"Swin-DistilBERTimbau":[163],"consistently":[164],"outperforms":[165],"demonstrating":[168],"strong":[169],"generalization":[170],"across":[171],"datasets.":[172],"ViTucano,":[173],"pre-trained":[177],"VLM,":[178],"surpasses":[179],"larger":[180],"multilingual":[181],"(GPT-4o,":[183],"LLaMa":[184],"3.2":[185],"Vision)":[186],"in":[187],"traditional":[188],"text-based":[189],"metrics,":[191],"while":[192],"GPT-4":[193],"achieve":[195],"highest":[197],"CLIP-Score,":[198],"highlighting":[199],"improved":[200],"image-text":[201],"Attention":[203],"analysis":[204],"reveals":[205],"systematic":[206],"biases,":[207],"including":[208],"gender":[209],"misclassification,":[210],"object":[211],"enumeration":[212],"errors,":[213],"spatial":[215],"inconsistencies.":[216]},"counts_by_year":[],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2026-04-18T00:00:00"}
