{"id":"https://openalex.org/W4390873420","doi":"https://doi.org/10.1109/tim.2024.3353830","title":"A Dual-Feature-Based Adaptive Shared Transformer Network for Image Captioning","display_name":"A Dual-Feature-Based Adaptive Shared Transformer Network for Image Captioning","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4390873420","doi":"https://doi.org/10.1109/tim.2024.3353830"},"language":"en","primary_location":{"id":"doi:10.1109/tim.2024.3353830","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tim.2024.3353830","pdf_url":null,"source":{"id":"https://openalex.org/S10892749","display_name":"IEEE Transactions on Instrumentation and Measurement","issn_l":"0018-9456","issn":["0018-9456","1557-9662"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Instrumentation and Measurement","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5060876756","display_name":"Yinbin Shi","orcid":"https://orcid.org/0000-0003-4601-1015"},"institutions":[{"id":"https://openalex.org/I75390827","display_name":"Beijing University of Chemical Technology","ror":"https://ror.org/00df5yc52","country_code":"CN","type":"education","lineage":["https://openalex.org/I75390827"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yinbin Shi","raw_affiliation_strings":["College of Information Science and Technology, Beijing University of Chemical Technology, Beijing, China"],"affiliations":[{"raw_affiliation_string":"College of Information Science and Technology, Beijing University of Chemical Technology, Beijing, China","institution_ids":["https://openalex.org/I75390827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078330551","display_name":"Ji Xia","orcid":"https://orcid.org/0009-0000-1418-8065"},"institutions":[{"id":"https://openalex.org/I75390827","display_name":"Beijing University of Chemical Technology","ror":"https://ror.org/00df5yc52","country_code":"CN","type":"education","lineage":["https://openalex.org/I75390827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ji Xia","raw_affiliation_strings":["College of Information Science and Technology, Beijing University of Chemical Technology, Beijing, China"],"affiliations":[{"raw_affiliation_string":"College of Information Science and Technology, Beijing University of Chemical Technology, Beijing, China","institution_ids":["https://openalex.org/I75390827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081318069","display_name":"MengChu Zhou","orcid":"https://orcid.org/0000-0002-5408-8752"},"institutions":[{"id":"https://openalex.org/I118118575","display_name":"New Jersey Institute of Technology","ror":"https://ror.org/05e74xb87","country_code":"US","type":"education","lineage":["https://openalex.org/I118118575"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"MengChu Zhou","raw_affiliation_strings":["Helen and John C. Hartmann Department of Electrical and Computer Engineering, New Jersey Institute of Technology, Newark, NJ, USA"],"affiliations":[{"raw_affiliation_string":"Helen and John C. Hartmann Department of Electrical and Computer Engineering, New Jersey Institute of Technology, Newark, NJ, USA","institution_ids":["https://openalex.org/I118118575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079155612","display_name":"Zhengcai Cao","orcid":"https://orcid.org/0000-0003-0344-0207"},"institutions":[{"id":"https://openalex.org/I75390827","display_name":"Beijing University of Chemical Technology","ror":"https://ror.org/00df5yc52","country_code":"CN","type":"education","lineage":["https://openalex.org/I75390827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhengcai Cao","raw_affiliation_strings":["College of Information Science and Technology, Beijing University of Chemical Technology, Beijing, China"],"affiliations":[{"raw_affiliation_string":"College of Information Science and Technology, Beijing University of Chemical Technology, Beijing, China","institution_ids":["https://openalex.org/I75390827"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5060876756"],"corresponding_institution_ids":["https://openalex.org/I75390827"],"apc_list":null,"apc_paid":null,"fwci":6.8229,"has_fulltext":false,"cited_by_count":26,"citation_normalized_percentile":{"value":0.97710941,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":"73","issue":null,"first_page":"1","last_page":"13"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.8214462995529175},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5826267004013062},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5665167570114136},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5063446760177612},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.40583133697509766},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.3800014853477478},{"id":"https://openalex.org/keywords/electronic-engineering","display_name":"Electronic engineering","score":0.35255566239356995},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.33314573764801025},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3260501027107239},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.24002733826637268},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.18700259923934937},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.16826900839805603}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.8214462995529175},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5826267004013062},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5665167570114136},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5063446760177612},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40583133697509766},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.3800014853477478},{"id":"https://openalex.org/C24326235","wikidata":"https://www.wikidata.org/wiki/Q126095","display_name":"Electronic engineering","level":1,"score":0.35255566239356995},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.33314573764801025},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3260501027107239},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.24002733826637268},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.18700259923934937},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.16826900839805603},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tim.2024.3353830","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tim.2024.3353830","pdf_url":null,"source":{"id":"https://openalex.org/S10892749","display_name":"IEEE Transactions on Instrumentation and Measurement","issn_l":"0018-9456","issn":["0018-9456","1557-9662"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Instrumentation and Measurement","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.41999998688697815,"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9"}],"awards":[{"id":"https://openalex.org/G1309666564","display_name":null,"funder_award_id":"92148202","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G36332496","display_name":null,"funder_award_id":"52175002","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":56,"referenced_works":["https://openalex.org/W1895577753","https://openalex.org/W1905882502","https://openalex.org/W1956340063","https://openalex.org/W2101105183","https://openalex.org/W2133512280","https://openalex.org/W2194775991","https://openalex.org/W2506483933","https://openalex.org/W2745461083","https://openalex.org/W2915977493","https://openalex.org/W2962861647","https://openalex.org/W2963084599","https://openalex.org/W2963101956","https://openalex.org/W2971402774","https://openalex.org/W2979739834","https://openalex.org/W2981165461","https://openalex.org/W2986670728","https://openalex.org/W2999127310","https://openalex.org/W3034655362","https://openalex.org/W3035160838","https://openalex.org/W3035284526","https://openalex.org/W3035497460","https://openalex.org/W3096609285","https://openalex.org/W3125509312","https://openalex.org/W3136792391","https://openalex.org/W3167939936","https://openalex.org/W3173407600","https://openalex.org/W3174377922","https://openalex.org/W3196122027","https://openalex.org/W3205765769","https://openalex.org/W3214016039","https://openalex.org/W3215626407","https://openalex.org/W4200462690","https://openalex.org/W4205581758","https://openalex.org/W4214917601","https://openalex.org/W4225627404","https://openalex.org/W4225645749","https://openalex.org/W4226109438","https://openalex.org/W4281665577","https://openalex.org/W4285186657","https://openalex.org/W4285197287","https://openalex.org/W4285740942","https://openalex.org/W4292263294","https://openalex.org/W4294982713","https://openalex.org/W4312365601","https://openalex.org/W4320458302","https://openalex.org/W4378697418","https://openalex.org/W4385245566","https://openalex.org/W4386072307","https://openalex.org/W6620707391","https://openalex.org/W6628877408","https://openalex.org/W6679436768","https://openalex.org/W6682631176","https://openalex.org/W6790019176","https://openalex.org/W6791353385","https://openalex.org/W6803872405","https://openalex.org/W6850204008"],"related_works":["https://openalex.org/W4310447809","https://openalex.org/W4200243030","https://openalex.org/W2800782462","https://openalex.org/W3209117276","https://openalex.org/W4388184981","https://openalex.org/W4323777661","https://openalex.org/W3164229987","https://openalex.org/W3215212336","https://openalex.org/W4290852288","https://openalex.org/W3217388757"],"abstract_inverted_index":{"Current":[0],"models":[1,217],"exhibit":[2],"notable":[3],"efficacy":[4],"in":[5,188],"image":[6],"captioning":[7],"tasks.":[8],"Mainstream":[9],"research":[10],"shows":[11],"that":[12,181,206],"combining":[13],"dual":[14,28,87],"visual":[15,18,29,138],"features":[16,30,44,93,97,144,168],"enhances":[17],"representations":[19],"and":[20,33,59,77,103,117,192,214],"brings":[21],"performance":[22,211],"boost.":[23],"However,":[24],"the":[25,51,73,83,119,130,159,182],"incorporation":[26],"of":[27,42,54,75,132,153,161,175,190],"complicates":[31],"computation":[32],"expands":[34],"parameters,":[35],"hindering":[36],"streamlined":[37],"model":[38,193,208],"deployment.":[39],"The":[40],"selection":[41],"region":[43,96],"requires":[45],"a":[46,66,146,151],"pre-trained":[47],"object":[48],"detector,":[49],"neglecting":[50],"model\u2019s":[52,120],"ease":[53],"use":[55],"for":[56],"new":[57],"scenarios":[58],"data.":[60],"In":[61],"this":[62],"paper,":[63],"we":[64,90,157],"propose":[65,107],"dual-feature":[67],"adaptive":[68],"shared":[69],"Transformer":[70,111,147],"network,":[71],"capitalizing":[72],"merits":[74],"grid":[76,143,167],"shallow":[78],"patch":[79],"features,":[80],"while":[81,195],"circumventing":[82],"extra":[84],"complexity":[85],"from":[86],"channels.":[88],"Specifically,":[89],"eschew":[91],"complex":[92],"such":[94],"as":[95],"to":[98,114,127,150],"facilitate":[99],"straightforward":[100],"dataset":[101],"compilation":[102],"expedite":[104],"inference.":[105],"We":[106],"an":[108],"Adaptive":[109],"Shared":[110],"block":[112],"(AST)":[113],"conserve":[115],"parameters":[116],"diminish":[118],"FLOPs.":[121],"A":[122],"gating":[123],"mechanism":[124],"is":[125],"employed":[126],"adaptively":[128],"compute":[129],"importance":[131],"each":[133],"feature,":[134],"thereby":[135],"obtaining":[136],"stronger":[137],"features.":[139],"Since":[140],"using":[141],"flattening":[142],"before":[145],"often":[148],"leads":[149],"loss":[152],"crucial":[154],"spatial":[155],"information,":[156],"incorporate":[158],"learning":[160],"relative":[162],"geometric":[163],"information":[164],"based":[165],"on":[166,202,212,218],"into":[169],"our":[170,207],"proposed":[171],"method.":[172],"Our":[173],"analysis":[174],"various":[176],"feature":[177],"fusion":[178],"techniques":[179],"reveals":[180],"AST":[183],"approach":[184],"outperforms":[185,215],"its":[186],"counterparts":[187],"terms":[189],"FLOPs":[191],"size":[194],"still":[196],"achieving":[197],"high":[198],"performance.":[199],"Extensive":[200],"experiments":[201],"different":[203],"datasets":[204],"indicate":[205],"demonstrates":[209],"competitive":[210],"MSCOCO":[213],"state-of-the-art":[216],"small-scale":[219],"datasets.":[220]},"counts_by_year":[{"year":2025,"cited_by_count":22},{"year":2024,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
