{"id":"https://openalex.org/W4393906056","doi":"https://doi.org/10.1109/tmm.2024.3384678","title":"Embedded Heterogeneous Attention Transformer for Cross-Lingual Image Captioning","display_name":"Embedded Heterogeneous Attention Transformer for Cross-Lingual Image Captioning","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4393906056","doi":"https://doi.org/10.1109/tmm.2024.3384678"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2024.3384678","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2024.3384678","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5025890671","display_name":"Zijie Song","orcid":"https://orcid.org/0000-0002-1262-764X"},"institutions":[{"id":"https://openalex.org/I16365422","display_name":"Hefei University of Technology","ror":"https://ror.org/02czkny70","country_code":"CN","type":"education","lineage":["https://openalex.org/I16365422"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zijie Song","raw_affiliation_strings":["School of Computer Science and Information Engineering, Hefei University of Technology, Hefei, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Information Engineering, Hefei University of Technology, Hefei, China","institution_ids":["https://openalex.org/I16365422"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040235604","display_name":"Zhenzhen Hu","orcid":"https://orcid.org/0000-0003-1042-8361"},"institutions":[{"id":"https://openalex.org/I16365422","display_name":"Hefei University of Technology","ror":"https://ror.org/02czkny70","country_code":"CN","type":"education","lineage":["https://openalex.org/I16365422"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenzhen Hu","raw_affiliation_strings":["School of Computer Science and Information Engineering, Hefei University of Technology, Hefei, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Information Engineering, Hefei University of Technology, Hefei, China","institution_ids":["https://openalex.org/I16365422"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025436570","display_name":"Yuanen Zhou","orcid":"https://orcid.org/0000-0002-4986-3611"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuanen Zhou","raw_affiliation_strings":["Institute of Artificial Intelligence, Hefei Comprehensive National Science Center, Hefei, China"],"affiliations":[{"raw_affiliation_string":"Institute of Artificial Intelligence, Hefei Comprehensive National Science Center, Hefei, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008632512","display_name":"Ye Zhao","orcid":"https://orcid.org/0000-0002-8180-4697"},"institutions":[{"id":"https://openalex.org/I16365422","display_name":"Hefei University of Technology","ror":"https://ror.org/02czkny70","country_code":"CN","type":"education","lineage":["https://openalex.org/I16365422"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ye Zhao","raw_affiliation_strings":["School of Computer Science and Information Engineering, Hefei University of Technology, Hefei, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Information Engineering, Hefei University of Technology, Hefei, China","institution_ids":["https://openalex.org/I16365422"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051332325","display_name":"Richang Hong","orcid":"https://orcid.org/0000-0001-5461-3986"},"institutions":[{"id":"https://openalex.org/I16365422","display_name":"Hefei University of Technology","ror":"https://ror.org/02czkny70","country_code":"CN","type":"education","lineage":["https://openalex.org/I16365422"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Richang Hong","raw_affiliation_strings":["School of Computer Science and Information Engineering, Hefei University of Technology, Hefei, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Information Engineering, Hefei University of Technology, Hefei, China","institution_ids":["https://openalex.org/I16365422"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100377147","display_name":"Meng Wang","orcid":"https://orcid.org/0000-0002-3094-7735"},"institutions":[{"id":"https://openalex.org/I16365422","display_name":"Hefei University of Technology","ror":"https://ror.org/02czkny70","country_code":"CN","type":"education","lineage":["https://openalex.org/I16365422"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Meng Wang","raw_affiliation_strings":["School of Computer Science and Information Engineering, Hefei University of Technology, Hefei, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Information Engineering, Hefei University of Technology, Hefei, China","institution_ids":["https://openalex.org/I16365422"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5025890671"],"corresponding_institution_ids":["https://openalex.org/I16365422"],"apc_list":null,"apc_paid":null,"fwci":4.4871,"has_fulltext":false,"cited_by_count":18,"citation_normalized_percentile":{"value":0.95589315,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":"26","issue":null,"first_page":"9008","last_page":"9020"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9929999709129333,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.8941214084625244},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.85953688621521},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6599183678627014},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5372630953788757},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.501619815826416},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.45871877670288086},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4104691743850708},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.3331524729728699}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.8941214084625244},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.85953688621521},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6599183678627014},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5372630953788757},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.501619815826416},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.45871877670288086},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4104691743850708},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.3331524729728699},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2024.3384678","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2024.3384678","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1829462488","display_name":null,"funder_award_id":"61932009","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6558903332","display_name":null,"funder_award_id":"62172138","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":96,"referenced_works":["https://openalex.org/W103340358","https://openalex.org/W1773149199","https://openalex.org/W1889081078","https://openalex.org/W1895577753","https://openalex.org/W1905882502","https://openalex.org/W1975563293","https://openalex.org/W2007972815","https://openalex.org/W2081553713","https://openalex.org/W2247931231","https://openalex.org/W2418300416","https://openalex.org/W2509490957","https://openalex.org/W2652651782","https://openalex.org/W2735810033","https://openalex.org/W2745461083","https://openalex.org/W2749708282","https://openalex.org/W2754927243","https://openalex.org/W2758123554","https://openalex.org/W2767774008","https://openalex.org/W2884134047","https://openalex.org/W2885013662","https://openalex.org/W2890531016","https://openalex.org/W2890718122","https://openalex.org/W2943885184","https://openalex.org/W2950212751","https://openalex.org/W2954199749","https://openalex.org/W2963048642","https://openalex.org/W2963084599","https://openalex.org/W2963101956","https://openalex.org/W2963527096","https://openalex.org/W2963707260","https://openalex.org/W2970398671","https://openalex.org/W2970611505","https://openalex.org/W2986670728","https://openalex.org/W2998166190","https://openalex.org/W3009270862","https://openalex.org/W3012648429","https://openalex.org/W3012871709","https://openalex.org/W3018388102","https://openalex.org/W3033200006","https://openalex.org/W3034144331","https://openalex.org/W3034414418","https://openalex.org/W3034655362","https://openalex.org/W3035160838","https://openalex.org/W3035284526","https://openalex.org/W3035323998","https://openalex.org/W3035454069","https://openalex.org/W3035702572","https://openalex.org/W3044175177","https://openalex.org/W3091588028","https://openalex.org/W3094673569","https://openalex.org/W3100183449","https://openalex.org/W3102566412","https://openalex.org/W3108202858","https://openalex.org/W3110014757","https://openalex.org/W3113425976","https://openalex.org/W3131318543","https://openalex.org/W3138516171","https://openalex.org/W3154362247","https://openalex.org/W3154430790","https://openalex.org/W3157860878","https://openalex.org/W3159583533","https://openalex.org/W3167939936","https://openalex.org/W3173220247","https://openalex.org/W3173961205","https://openalex.org/W3174010726","https://openalex.org/W3175971420","https://openalex.org/W3176124636","https://openalex.org/W3191818374","https://openalex.org/W3200356107","https://openalex.org/W3206022579","https://openalex.org/W3210150990","https://openalex.org/W3212664589","https://openalex.org/W3216130706","https://openalex.org/W4205199670","https://openalex.org/W4213174824","https://openalex.org/W4213453379","https://openalex.org/W4214642029","https://openalex.org/W4221143761","https://openalex.org/W4221147537","https://openalex.org/W4283793919","https://openalex.org/W4285065310","https://openalex.org/W4285602612","https://openalex.org/W4312289196","https://openalex.org/W4385245566","https://openalex.org/W6639432524","https://openalex.org/W6691269169","https://openalex.org/W6738964360","https://openalex.org/W6739682802","https://openalex.org/W6763643401","https://openalex.org/W6766904570","https://openalex.org/W6767362881","https://openalex.org/W6774950721","https://openalex.org/W6791353385","https://openalex.org/W6801213118","https://openalex.org/W6809536152","https://openalex.org/W6810334672"],"related_works":["https://openalex.org/W4310447809","https://openalex.org/W4200243030","https://openalex.org/W2800782462","https://openalex.org/W3209117276","https://openalex.org/W4388184981","https://openalex.org/W4323777661","https://openalex.org/W3164229987","https://openalex.org/W3215212336","https://openalex.org/W4290852288","https://openalex.org/W3217388757"],"abstract_inverted_index":{"Cross-lingual":[0],"image":[1,35,54,215,223],"captioning":[2],"is":[3,24],"a":[4,91,155],"challenging":[5],"task":[6,23],"that":[7,181],"requires":[8],"addressing":[9],"both":[10],"cross-lingual":[11,214],"and":[12,29,36,56,81,86,105,116,135,141,177,225],"cross-modal":[13,40],"obstacles":[14],"in":[15,21,146,161,175,185],"multimedia":[16],"analysis.":[17],"The":[18,109,189],"crucial":[19],"issue":[20],"this":[22],"to":[25,77,128,136,158,172,200],"model":[26,157],"the":[27,30,34,45,49,53,113,147,169,193,211,218],"global":[28],"local":[31,50,82],"matching":[32,51],"between":[33,52,84],"different":[37,87],"languages.":[38,64,163],"Existing":[39],"embedding":[41],"methods":[42],"based":[43],"on":[44,168],"transformer":[46],"architecture":[47],"oversee":[48],"region":[55],"monolingual":[57,203],"words,":[58],"especially":[59],"when":[60],"dealing":[61],"with":[62],"diverse":[63],"To":[65],"overcome":[66],"these":[67],"limitations,":[68],"we":[69],"propose":[70],"an":[71],"Embedded":[72],"Heterogeneous":[73,97,100,106],"Attention":[74,101],"Transformer":[75],"(EHAT)":[76],"establish":[78],"cross-domain":[79,119,144],"relationships":[80,120],"correspondences":[83],"images":[85],"languages":[88,134,180],"by":[89,121],"using":[90],"heterogeneous":[92,138,151],"network.":[93],"EHAT":[94,207],"comprises":[95],"Masked":[96],"Cross-attention":[98],"(MHCA),":[99],"Reasoning":[102],"Network":[103],"(HARN),":[104],"Co-attention":[107],"(HCA).":[108],"HARN":[110],"serves":[111],"as":[112],"core":[114],"network":[115],"it":[117],"captures":[118],"leveraging":[122],"visual":[123],"bounding":[124],"box":[125],"representation":[126],"features":[127,131],"connect":[129],"word":[130],"from":[132],"two":[133,162,179],"learn":[137],"maps.":[139],"MHCA":[140],"HCA":[142],"facilitate":[143],"integration":[145],"encoder":[148],"through":[149],"specialized":[150],"attention":[152],"mechanisms,":[153],"enabling":[154],"single":[156],"generate":[159,173],"captions":[160,174],"We":[164],"evaluate":[165],"our":[166,197],"approach":[167],"MSCOCO":[170],"dataset":[171],"English":[176],"Chinese,":[178],"exhibit":[182],"significant":[183],"differences":[184],"their":[186],"language":[187],"families.":[188],"experimental":[190],"results":[191],"demonstrate":[192],"superior":[194],"performance":[195],"of":[196,213],"method":[198],"compared":[199],"existing":[201],"advanced":[202],"methods.":[204],"Our":[205],"proposed":[206],"framework":[208],"effectively":[209],"addresses":[210],"challenges":[212],"captioning,":[216],"paving":[217],"way":[219],"for":[220],"improved":[221],"multilingual":[222],"analysis":[224],"understanding.":[226]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":13},{"year":2024,"cited_by_count":3}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
