{"id":"https://openalex.org/W4377235499","doi":"https://doi.org/10.1109/tcsvt.2023.3277827","title":"Concept Parser With Multimodal Graph Learning for Video Captioning","display_name":"Concept Parser With Multimodal Graph Learning for Video Captioning","publication_year":2023,"publication_date":"2023-05-22","ids":{"openalex":"https://openalex.org/W4377235499","doi":"https://doi.org/10.1109/tcsvt.2023.3277827"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2023.3277827","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2023.3277827","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047021636","display_name":"Bofeng Wu","orcid":"https://orcid.org/0000-0002-4539-4854"},"institutions":[{"id":"https://openalex.org/I50760025","display_name":"Hangzhou Dianzi University","ror":"https://ror.org/0576gt767","country_code":"CN","type":"education","lineage":["https://openalex.org/I50760025"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Bofeng Wu","raw_affiliation_strings":["Key Laboratory of Complex Systems Modeling and Simulation, School of Computer Science and Technology, Hangzhou Dianzi University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Complex Systems Modeling and Simulation, School of Computer Science and Technology, Hangzhou Dianzi University, Hangzhou, China","institution_ids":["https://openalex.org/I50760025"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089927825","display_name":"Buyu Liu","orcid":"https://orcid.org/0009-0004-5534-7463"},"institutions":[{"id":"https://openalex.org/I20089843","display_name":"Princeton University","ror":"https://ror.org/00hx57361","country_code":"US","type":"education","lineage":["https://openalex.org/I20089843"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Buyu Liu","raw_affiliation_strings":["NEC Laboratories America, Princeton, NJ, USA"],"affiliations":[{"raw_affiliation_string":"NEC Laboratories America, Princeton, NJ, USA","institution_ids":["https://openalex.org/I20089843"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Peng Huang","orcid":"https://orcid.org/0000-0002-5727-2790"},"institutions":[{"id":"https://openalex.org/I50760025","display_name":"Hangzhou Dianzi University","ror":"https://ror.org/0576gt767","country_code":"CN","type":"education","lineage":["https://openalex.org/I50760025"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peng Huang","raw_affiliation_strings":["Key Laboratory of Complex Systems Modeling and Simulation, School of Computer Science and Technology, Hangzhou Dianzi University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Complex Systems Modeling and Simulation, School of Computer Science and Technology, Hangzhou Dianzi University, Hangzhou, China","institution_ids":["https://openalex.org/I50760025"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109304655","display_name":"Jun Bao","orcid":"https://orcid.org/0000-0003-4460-0772"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Bao","raw_affiliation_strings":["Hangzhou Global Scientific and Technological Innovation Center, Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Hangzhou Global Scientific and Technological Innovation Center, Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022800038","display_name":"Xi Peng","orcid":"https://orcid.org/0000-0002-5727-2790"},"institutions":[{"id":"https://openalex.org/I24185976","display_name":"Sichuan University","ror":"https://ror.org/011ashp19","country_code":"CN","type":"education","lineage":["https://openalex.org/I24185976"]},{"id":"https://openalex.org/I4210125143","display_name":"Chengdu University","ror":"https://ror.org/034z67559","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210125143"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peng Xi","raw_affiliation_strings":["College of Computer Science, Sichuan University, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science, Sichuan University, Chengdu, China","institution_ids":["https://openalex.org/I4210125143","https://openalex.org/I24185976"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5050817770","display_name":"Jun Yu","orcid":"https://orcid.org/0000-0003-1922-7283"},"institutions":[{"id":"https://openalex.org/I50760025","display_name":"Hangzhou Dianzi University","ror":"https://ror.org/0576gt767","country_code":"CN","type":"education","lineage":["https://openalex.org/I50760025"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Yu","raw_affiliation_strings":["Key Laboratory of Complex Systems Modeling and Simulation, School of Computer Science and Technology, Hangzhou Dianzi University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Complex Systems Modeling and Simulation, School of Computer Science and Technology, Hangzhou Dianzi University, Hangzhou, China","institution_ids":["https://openalex.org/I50760025"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5047021636"],"corresponding_institution_ids":["https://openalex.org/I50760025"],"apc_list":null,"apc_paid":null,"fwci":2.7481,"has_fulltext":false,"cited_by_count":23,"citation_normalized_percentile":{"value":0.92116047,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":"33","issue":"9","first_page":"4484","last_page":"4495"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.854171872138977},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6683456301689148},{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.577335000038147},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.5699045658111572},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.5599798560142517},{"id":"https://openalex.org/keywords/ground-truth","display_name":"Ground truth","score":0.5192797183990479},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.4766044020652771},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4742804169654846},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.33479204773902893},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.18271714448928833},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.10359171032905579},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.08074623346328735}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.854171872138977},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6683456301689148},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.577335000038147},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.5699045658111572},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.5599798560142517},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.5192797183990479},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.4766044020652771},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4742804169654846},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.33479204773902893},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.18271714448928833},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.10359171032905579},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.08074623346328735},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2023.3277827","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2023.3277827","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6151690139","display_name":null,"funder_award_id":"62020106007","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6570941292","display_name":null,"funder_award_id":"62125201","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":74,"referenced_works":["https://openalex.org/W1956340063","https://openalex.org/W2064675550","https://openalex.org/W2101105183","https://openalex.org/W2108325777","https://openalex.org/W2123301721","https://openalex.org/W2143017621","https://openalex.org/W2164290393","https://openalex.org/W2194775991","https://openalex.org/W2277195237","https://openalex.org/W2425121537","https://openalex.org/W2895845501","https://openalex.org/W2896457183","https://openalex.org/W2905145027","https://openalex.org/W2908510526","https://openalex.org/W2948358897","https://openalex.org/W2951390634","https://openalex.org/W2953106684","https://openalex.org/W2962934715","https://openalex.org/W2963037989","https://openalex.org/W2963150697","https://openalex.org/W2963524571","https://openalex.org/W2963541336","https://openalex.org/W2963971014","https://openalex.org/W2964350391","https://openalex.org/W2964532449","https://openalex.org/W2979826702","https://openalex.org/W2984862483","https://openalex.org/W2988753485","https://openalex.org/W2989322838","https://openalex.org/W2990964949","https://openalex.org/W2996984511","https://openalex.org/W3022778813","https://openalex.org/W3034221024","https://openalex.org/W3035365026","https://openalex.org/W3035392611","https://openalex.org/W3081168214","https://openalex.org/W3122640483","https://openalex.org/W3126721948","https://openalex.org/W3134875898","https://openalex.org/W3138516171","https://openalex.org/W3158375352","https://openalex.org/W3168640669","https://openalex.org/W3176425931","https://openalex.org/W3176689360","https://openalex.org/W3204588463","https://openalex.org/W3207886649","https://openalex.org/W3217340782","https://openalex.org/W4205510032","https://openalex.org/W4214612132","https://openalex.org/W4226109438","https://openalex.org/W4295312788","https://openalex.org/W4304091587","https://openalex.org/W4311252325","https://openalex.org/W4312560592","https://openalex.org/W4313131769","https://openalex.org/W4318718936","https://openalex.org/W4320458302","https://openalex.org/W4385245566","https://openalex.org/W4385574358","https://openalex.org/W4394659899","https://openalex.org/W6620707391","https://openalex.org/W6678262379","https://openalex.org/W6684090549","https://openalex.org/W6739901393","https://openalex.org/W6755207826","https://openalex.org/W6757817989","https://openalex.org/W6766978945","https://openalex.org/W6784184991","https://openalex.org/W6790307280","https://openalex.org/W6804001748","https://openalex.org/W6847435255","https://openalex.org/W6849177959","https://openalex.org/W6850204008","https://openalex.org/W6864544085"],"related_works":["https://openalex.org/W4210416330","https://openalex.org/W3088136942","https://openalex.org/W2949362007","https://openalex.org/W2775506363","https://openalex.org/W4310447809","https://openalex.org/W4200243030","https://openalex.org/W2800782462","https://openalex.org/W4323777661","https://openalex.org/W3209117276","https://openalex.org/W4388184981"],"abstract_inverted_index":{"Conventional":[0],"video":[1,87],"captioning":[2],"methods":[3],"are":[4,83,131],"either":[5],"stage-wise":[6],"or":[7],"simple":[8],"end-to-end.":[9],"While":[10],"the":[11,25,86,103,132,147,194,203],"former":[12],"might":[13],"introduce":[14],"additional":[15],"noise":[16],"when":[17],"exploiting":[18],"off-the-shelf":[19],"models":[20],"to":[21,40,101,150,152,202],"provide":[22],"extra":[23],"information,":[24],"latter":[26],"suffers":[27],"from":[28,85],"lacking":[29],"high-level":[30,65,80],"cues.":[31,178],"Therefore,":[32],"a":[33,51,95,118,157,164,210],"more":[34],"desired":[35],"framework":[36],"should":[37],"be":[38],"able":[39],"capture":[41],"multi-aspects":[42],"of":[43,92,106,171],"videos":[44],"consistently.":[45],"To":[46,114],"this":[47],"end,":[48],"we":[49,141],"present":[50],"concept-aware":[52],"and":[53,64,68,79,89,128,134,173,190,199,207],"task-specific":[54],"model":[55],"named":[56],"CAT":[57,187],"that":[58,167,186],"accounts":[59],"for":[60],"both":[61],"low-level":[62,77],"visual":[63,78,127],"concept":[66,81,90,96,107,129],"cues,":[67],"incorporates":[69],"them":[70],"effectively":[71],"in":[72,124,146,193],"an":[73],"end-to-end":[74],"manner.":[75],"Specifically,":[76],"features":[82,130],"obtained":[84],"transformer":[88,120,149],"parser":[91,108],"CAT.":[93],"And":[94],"loss":[97,166],"is":[98,121,136,161],"further":[99],"introduced":[100,123],"regularize":[102],"learning":[104,170],"process":[105],"w.r.t.":[109],"generated":[110],"pseudo":[111],"ground":[112],"truth.":[113],"combine":[115],"multi-level":[116,177],"features,":[117],"caption":[119,135,148],"later":[122],"CAT,":[125],"where":[126],"inputs":[133],"its":[137],"output.":[138],"In":[139],"particular,":[140],"make":[142],"critical":[143],"design":[144],"choices":[145],"learn":[151],"exploit":[153],"these":[154],"cues":[155],"with":[156],"multi-modal":[158],"graph.":[159],"This":[160],"achieved":[162],"by":[163],"graph":[165],"enforces":[168],"effective":[169],"intra":[172],"inter":[174],"correlations":[175],"between":[176],"Extensive":[179],"experiments":[180],"on":[181,197,213],"three":[182],"benchmark":[183],"datasets":[184],"demonstrate":[185],"achieves":[188,209],"2.3":[189],"0.7":[191],"improvements":[192],"CIDEr":[195],"metric":[196],"MSVD":[198],"MSR-VTT":[200],"compared":[201],"state-of-the-art":[204],"method":[205],"SwinBERT":[206],"also":[208],"competitive":[211],"result":[212],"VATEX.":[214]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":11},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":4}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
