{"id":"https://openalex.org/W4404317764","doi":"https://doi.org/10.1109/aiotc63215.2024.10748270","title":"Multi-Branch Information Network for Advanced Image Captioning","display_name":"Multi-Branch Information Network for Advanced Image Captioning","publication_year":2024,"publication_date":"2024-09-13","ids":{"openalex":"https://openalex.org/W4404317764","doi":"https://doi.org/10.1109/aiotc63215.2024.10748270"},"language":"en","primary_location":{"id":"doi:10.1109/aiotc63215.2024.10748270","is_oa":false,"landing_page_url":"https://doi.org/10.1109/aiotc63215.2024.10748270","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 3rd International Conference on Artificial Intelligence, Internet of Things and Cloud Computing Technology (AIoTC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048960862","display_name":"Caixia Song","orcid":"https://orcid.org/0009-0003-3455-2508"},"institutions":[{"id":"https://openalex.org/I136765683","display_name":"Tianjin University of Technology","ror":"https://ror.org/00zbe0w13","country_code":"CN","type":"education","lineage":["https://openalex.org/I136765683"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Caixia Song","raw_affiliation_strings":["School of Computer Science and Engineering, Tianjin University of Technology,Tianjin,China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Tianjin University of Technology,Tianjin,China","institution_ids":["https://openalex.org/I136765683"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5048960862"],"corresponding_institution_ids":["https://openalex.org/I136765683"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.20738527,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"130","last_page":"133"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.989300012588501,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9732999801635742,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.881458044052124},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7540404796600342},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.6173413991928101},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.43954578042030334},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.39706093072891235}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.881458044052124},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7540404796600342},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.6173413991928101},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43954578042030334},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.39706093072891235}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/aiotc63215.2024.10748270","is_oa":false,"landing_page_url":"https://doi.org/10.1109/aiotc63215.2024.10748270","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 3rd International Conference on Artificial Intelligence, Internet of Things and Cloud Computing Technology (AIoTC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W1895577753","https://openalex.org/W2302086703","https://openalex.org/W2745461083","https://openalex.org/W2890531016","https://openalex.org/W2962735233","https://openalex.org/W2986670728","https://openalex.org/W3034655362","https://openalex.org/W3091588028","https://openalex.org/W3167939936","https://openalex.org/W3174377922","https://openalex.org/W4221143761","https://openalex.org/W4312924260","https://openalex.org/W4387951982"],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"Understanding":[0],"images":[1,40],"from":[2],"different":[3,62],"perspectives":[4],"and":[5,24,92,125,143,149,169],"performing":[6],"advanced":[7,93],"semantic":[8,59,90,94],"consistency":[9,95,145],"transformation":[10,96,146],"across":[11],"modalities":[12,151],"are":[13,22],"key":[14],"elements":[15],"in":[16,35,61,139,177],"generating":[17],"natural":[18],"language":[19],"descriptions":[20,45,116],"that":[21,86,135],"relevant":[23],"coherent":[25],"with":[26],"the":[27,67,140,153,162,166,171,178],"current":[28,51],"image":[29,100,119],"captions.":[30],"Insufficient":[31],"understanding":[32,91],"or":[33,46],"errors":[34],"interpreting":[36],"visual":[37,89,137,148],"features":[38],"of":[39,70,175],"can":[41],"lead":[42],"to":[43,57,112,122],"inconsistent":[44],"generate":[47],"hallucination":[48],"problems.":[49],"Moreover,":[50],"techniques":[52],"often":[53],"employ":[54],"attention":[55],"mechanisms":[56],"achieve":[58],"alignment":[60],"representation":[63],"spaces":[64],"while":[65],"overlooking":[66],"underlying":[68],"heterogeneity":[69],"multimodal":[71],"data.":[72],"In":[73],"this":[74],"paper,":[75],"we":[76,105,129],"propose":[77],"a":[78,98,108,131],"Multi-Branch":[79],"Information":[80],"Network":[81],"for":[82,117,124],"Advanced":[83],"Image":[84],"Captioning":[85],"integrates":[87],"comprehensive":[88],"into":[97],"single":[99],"captioning":[101],"model":[102,111],"architecture.":[103],"Technically,":[104],"first":[106],"utilize":[107],"cross-modal":[109],"retrieval":[110],"obtain":[113],"fine-grained":[114],"contextual":[115],"each":[118],"region,":[120],"aiming":[121],"compensate":[123],"alleviate":[126],"hallucination.":[127],"Next,":[128],"introduce":[130],"novel":[132],"mapping":[133],"network":[134],"reconstructs":[136],"information":[138],"textual":[141,150,154],"space":[142],"achieves":[144,170],"between":[147],"within":[152],"domain.":[155],"Empirical":[156],"evidences":[157],"show":[158],"that,":[159],"ours":[160],"outperforms":[161],"latest":[163],"approaches":[164],"on":[165],"COCO":[167],"dataset":[168],"best":[172],"CIDEr":[173],"score":[174],"138.4%":[176],"Karpathy":[179],"test":[180],"set.":[181]},"counts_by_year":[],"updated_date":"2025-12-22T23:10:17.713674","created_date":"2025-10-10T00:00:00"}
