{"id":"https://openalex.org/W4390075233","doi":"https://doi.org/10.1109/tnnls.2023.3323491","title":"Visual Commonsense-Aware Representation Network for Video Captioning","display_name":"Visual Commonsense-Aware Representation Network for Video Captioning","publication_year":2023,"publication_date":"2023-12-21","ids":{"openalex":"https://openalex.org/W4390075233","doi":"https://doi.org/10.1109/tnnls.2023.3323491","pmid":"https://pubmed.ncbi.nlm.nih.gov/38127607"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2023.3323491","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2023.3323491","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087623065","display_name":"Pengpeng Zeng","orcid":"https://orcid.org/0000-0002-0672-3790"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Pengpeng Zeng","raw_affiliation_strings":["Shenzhen Institute for Advanced Study, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Institute for Advanced Study, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084439574","display_name":"Haonan Zhang","orcid":"https://orcid.org/0000-0003-1015-7338"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haonan Zhang","raw_affiliation_strings":["Future Media Center and the School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"Future Media Center and the School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066645546","display_name":"Lianli Gao","orcid":"https://orcid.org/0000-0002-2522-6394"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lianli Gao","raw_affiliation_strings":["Shenzhen Institute for Advanced Study, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Institute for Advanced Study, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101598763","display_name":"Xiangpeng Li","orcid":"https://orcid.org/0000-0001-5350-5780"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiangpeng Li","raw_affiliation_strings":["Future Media Center and the School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"Future Media Center and the School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002696909","display_name":"Jin Qian","orcid":"https://orcid.org/0000-0002-9368-387X"},"institutions":[{"id":"https://openalex.org/I4800084","display_name":"Southwest Jiaotong University","ror":"https://ror.org/00hn7w693","country_code":"CN","type":"education","lineage":["https://openalex.org/I4800084"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jin Qian","raw_affiliation_strings":["School of Computing and Artificial Intelligence, Southwest Jiaotong University, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"School of Computing and Artificial Intelligence, Southwest Jiaotong University, Chengdu, China","institution_ids":["https://openalex.org/I4800084"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5052993469","display_name":"Heng Tao Shen","orcid":"https://orcid.org/0000-0002-2999-2088"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Heng Tao Shen","raw_affiliation_strings":["Future Media Center and the School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"Future Media Center and the School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5087623065"],"corresponding_institution_ids":["https://openalex.org/I150229711"],"apc_list":null,"apc_paid":null,"fwci":3.3431,"has_fulltext":false,"cited_by_count":28,"citation_normalized_percentile":{"value":0.94077574,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":"36","issue":"1","first_page":"1092","last_page":"1103"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9719934463500977},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8599522113800049},{"id":"https://openalex.org/keywords/component","display_name":"Component (thermodynamics)","score":0.5377902984619141},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5372154116630554},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5184482932090759},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.4970383942127228},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.4930855929851532},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.41819122433662415},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.416734516620636},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.41460880637168884},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3598523736000061},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.22699809074401855}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9719934463500977},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8599522113800049},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.5377902984619141},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5372154116630554},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5184482932090759},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.4970383942127228},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.4930855929851532},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.41819122433662415},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.416734516620636},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.41460880637168884},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3598523736000061},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.22699809074401855},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tnnls.2023.3323491","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2023.3323491","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:38127607","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/38127607","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2546663186","display_name":null,"funder_award_id":"62020106008","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3998246166","display_name":null,"funder_award_id":"2022YFC2009903","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G6298522600","display_name":null,"funder_award_id":"2022119","funder_id":"https://openalex.org/F4320333335","funder_display_name":"Sichuan Province Science and Technology Support Program"},{"id":"https://openalex.org/G6864684409","display_name":null,"funder_award_id":"62122018","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6971015501","display_name":null,"funder_award_id":"171106","funder_id":"https://openalex.org/F4320334945","funder_display_name":"Fok Ying Tong Education Foundation"},{"id":"https://openalex.org/G7314976618","display_name":null,"funder_award_id":"61872064","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7666647912","display_name":null,"funder_award_id":"2022YFC2009900","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G8027008232","display_name":null,"funder_award_id":"61772116","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320333335","display_name":"Sichuan Province Science and Technology Support Program","ror":null},{"id":"https://openalex.org/F4320334945","display_name":"Fok Ying Tong Education Foundation","ror":"https://ror.org/01mv9t934"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":75,"referenced_works":["https://openalex.org/W102708294","https://openalex.org/W1586939924","https://openalex.org/W1927052826","https://openalex.org/W1956340063","https://openalex.org/W2081580037","https://openalex.org/W2133459682","https://openalex.org/W2139501017","https://openalex.org/W2194775991","https://openalex.org/W2250539671","https://openalex.org/W2425121537","https://openalex.org/W2549139847","https://openalex.org/W2554906389","https://openalex.org/W2561529111","https://openalex.org/W2603266952","https://openalex.org/W2765716052","https://openalex.org/W2766520430","https://openalex.org/W2886641317","https://openalex.org/W2904291752","https://openalex.org/W2905145027","https://openalex.org/W2907492528","https://openalex.org/W2914629512","https://openalex.org/W2947312908","https://openalex.org/W2948358897","https://openalex.org/W2954199749","https://openalex.org/W2962681491","https://openalex.org/W2962949233","https://openalex.org/W2962958773","https://openalex.org/W2962990649","https://openalex.org/W2963477107","https://openalex.org/W2963843052","https://openalex.org/W2964241990","https://openalex.org/W2981582341","https://openalex.org/W2984862483","https://openalex.org/W2988753485","https://openalex.org/W2989322838","https://openalex.org/W2996984511","https://openalex.org/W2997344006","https://openalex.org/W3010356384","https://openalex.org/W3021007069","https://openalex.org/W3034221024","https://openalex.org/W3034730770","https://openalex.org/W3035365026","https://openalex.org/W3035372819","https://openalex.org/W3035392611","https://openalex.org/W3039060838","https://openalex.org/W3087338569","https://openalex.org/W3167939936","https://openalex.org/W3173367591","https://openalex.org/W3174001836","https://openalex.org/W3176013197","https://openalex.org/W3176425931","https://openalex.org/W3176689360","https://openalex.org/W3178827611","https://openalex.org/W3192261211","https://openalex.org/W3193290865","https://openalex.org/W3205398323","https://openalex.org/W3206019042","https://openalex.org/W3207015681","https://openalex.org/W3209229003","https://openalex.org/W4205399593","https://openalex.org/W4214692497","https://openalex.org/W4226109438","https://openalex.org/W4283722434","https://openalex.org/W4283797848","https://openalex.org/W4385245566","https://openalex.org/W6631190155","https://openalex.org/W6682631176","https://openalex.org/W6684090549","https://openalex.org/W6686509673","https://openalex.org/W6754140808","https://openalex.org/W6799655445","https://openalex.org/W6801793312","https://openalex.org/W6803537622","https://openalex.org/W6812308581","https://openalex.org/W6898505805"],"related_works":["https://openalex.org/W4210416330","https://openalex.org/W3088136942","https://openalex.org/W2949362007","https://openalex.org/W2775506363","https://openalex.org/W4290852288","https://openalex.org/W4388893791","https://openalex.org/W4283207562","https://openalex.org/W2963177403","https://openalex.org/W2330246314","https://openalex.org/W2949522393"],"abstract_inverted_index":{"Generating":[0],"consecutive":[1],"descriptions":[2],"for":[3,93],"videos,":[4],"that":[5,58,176],"is,":[6],"video":[7,22,49,62,94,109,132,168,199],"captioning,":[8],"requires":[9],"taking":[10],"full":[11],"advantage":[12],"of":[13,29,69,185,198,213],"visual":[14,55,88,127,140],"representation":[15,90],"along":[16],"with":[17],"the":[18,43,53,112,183,195,210],"generation":[19,154],"process.":[20],"Existing":[21],"captioning":[23,169],"methods":[24,40],"focus":[25],"on":[26,165],"an":[27],"exploration":[28],"spatial-temporal":[30],"representations":[31],"and":[32,173,203],"their":[33,67],"relationships":[34],"to":[35,72,145,159],"produce":[36],"inferences.":[37],"However,":[38],"such":[39],"only":[41],"exploit":[42],"superficial":[44],"association":[45],"contained":[46],"in":[47,60,130,137],"a":[48,61,82,99,102,126,131,147,152],"itself":[50],"without":[51,119],"considering":[52],"intrinsic":[54],"commonsense":[56,128],"knowledge":[57,70],"exists":[59],"dataset,":[63],"which":[64,134,207],"may":[65],"hinder":[66],"capabilities":[68],"cognitive":[71],"reason":[73],"accurate":[74],"descriptions.":[75],"To":[76],"address":[77],"this":[78,205],"problem,":[79],"we":[80,97],"propose":[81],"simple,":[83],"yet":[84],"effective":[85],"method,":[86],"called":[87],"commonsense-aware":[89],"network":[91],"(VCRN),":[92],"captioning.":[95],"Specifically,":[96],"construct":[98],"Video":[100],"Dictionary,":[101],"plug-and-play":[103],"component,":[104],"obtained":[105],"by":[106],"clustering":[107],"all":[108],"features":[110],"from":[111],"total":[113],"dataset":[114],"into":[115,194],"multiple":[116],"clustered":[117],"centers":[118],"additional":[120],"annotation.":[121],"Each":[122],"center":[123],"implicitly":[124],"represents":[125],"concept":[129,141,149],"domain,":[133],"is":[135,157,192],"utilized":[136],"our":[138,177,186,190,214],"proposed":[139,158],"selection":[142],"(VCS)":[143],"component":[144,156],"obtain":[146],"video-related":[148],"feature.":[150],"Next,":[151],"concept-integrated":[153],"(CIG)":[155],"enhance":[160],"caption":[161],"generation.":[162],"Extensive":[163],"experiments":[164],"three":[166],"public":[167],"benchmarks:":[170],"MSVD,":[171],"MSR-VTT,":[172],"VATEX,":[174],"demonstrate":[175],"method":[178,191,197],"achieves":[179],"state-of-the-art":[180],"performance,":[181,206],"indicating":[182],"effectiveness":[184],"method.":[187,215],"In":[188],"addition,":[189],"integrated":[193],"existing":[196],"question":[200],"answering":[201],"(VideoQA)":[202],"improves":[204],"further":[208],"demonstrates":[209],"generalization":[211],"capability":[212],"The":[216],"source":[217],"code":[218],"has":[219],"been":[220],"released":[221],"at":[222],"https://github.com/zchoi/VCRN.":[223]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":12},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":4}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
