{"id":"https://openalex.org/W4396878071","doi":"https://doi.org/10.1109/tcsvt.2024.3399933","title":"EvCap: Element-Aware Video Captioning","display_name":"EvCap: Element-Aware Video Captioning","publication_year":2024,"publication_date":"2024-05-13","ids":{"openalex":"https://openalex.org/W4396878071","doi":"https://doi.org/10.1109/tcsvt.2024.3399933"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2024.3399933","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2024.3399933","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100319993","display_name":"Sheng Liu","orcid":"https://orcid.org/0000-0001-9608-0524"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Sheng Liu","raw_affiliation_strings":["State Key Laboratory of Virual Reality Technology and Systems, School of Computer Science and Engineering, Beihang University, Beijing, China","Shanghai Xunmeng Information Technology Inc, Shanghai, China","Beihang University, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Virual Reality Technology and Systems, School of Computer Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]},{"raw_affiliation_string":"Shanghai Xunmeng Information Technology Inc, Shanghai, China","institution_ids":[]},{"raw_affiliation_string":"Beihang University, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012121355","display_name":"Annan Li","orcid":"https://orcid.org/0000-0003-3497-5052"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Annan Li","raw_affiliation_strings":["State Key Laboratory of Virual Reality Technology and Systems, School of Computer Science and Engineering, Beihang University, Beijing, China","School of Computer Science and Engineering, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Virual Reality Technology and Systems, School of Computer Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100458090","display_name":"Yuwei Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuwei Zhao","raw_affiliation_strings":["State Key Laboratory of Virual Reality Technology and Systems, School of Computer Science and Engineering, Beihang University, Beijing, China","School of Computer Science and Engineering, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Virual Reality Technology and Systems, School of Computer Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108891681","display_name":"Jiahao Wang","orcid":"https://orcid.org/0009-0000-5496-9720"},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiahao Wang","raw_affiliation_strings":["Huawei Technologies Inc., Shenzhen, China","Huawei Technologies Inc, ShenZhen, China"],"affiliations":[{"raw_affiliation_string":"Huawei Technologies Inc., Shenzhen, China","institution_ids":["https://openalex.org/I2250955327"]},{"raw_affiliation_string":"Huawei Technologies Inc, ShenZhen, China","institution_ids":["https://openalex.org/I2250955327"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100398953","display_name":"Yunhong Wang","orcid":"https://orcid.org/0000-0001-8001-2703"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunhong Wang","raw_affiliation_strings":["State Key Laboratory of Virual Reality Technology and Systems, School of Computer Science and Engineering, Beihang University, Beijing, China","School of Computer Science and Engineering, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Virual Reality Technology and Systems, School of Computer Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100319993"],"corresponding_institution_ids":["https://openalex.org/I82880672"],"apc_list":null,"apc_paid":null,"fwci":2.7072,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.91211622,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"34","issue":"10","first_page":"9718","last_page":"9731"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9818000197410583,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.8722955584526062},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7849608659744263},{"id":"https://openalex.org/keywords/element","display_name":"Element (criminal law)","score":0.47518980503082275},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.4437185823917389},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.40103086829185486},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.39084383845329285},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.38133835792541504},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.24831250309944153}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.8722955584526062},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7849608659744263},{"id":"https://openalex.org/C200288055","wikidata":"https://www.wikidata.org/wiki/Q2621792","display_name":"Element (criminal law)","level":2,"score":0.47518980503082275},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.4437185823917389},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.40103086829185486},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.39084383845329285},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38133835792541504},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.24831250309944153},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2024.3399933","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2024.3399933","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.7599999904632568}],"awards":[{"id":"https://openalex.org/G7665144123","display_name":null,"funder_award_id":"U20B2069","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":100,"referenced_works":["https://openalex.org/W1498436455","https://openalex.org/W1522734439","https://openalex.org/W1601567445","https://openalex.org/W1946342668","https://openalex.org/W1956340063","https://openalex.org/W1988419820","https://openalex.org/W1995820507","https://openalex.org/W2035434106","https://openalex.org/W2046589395","https://openalex.org/W2064675550","https://openalex.org/W2101105183","https://openalex.org/W2117539524","https://openalex.org/W2125707784","https://openalex.org/W2152984213","https://openalex.org/W2164948578","https://openalex.org/W2194775991","https://openalex.org/W2250539671","https://openalex.org/W2277195237","https://openalex.org/W2295739661","https://openalex.org/W2425121537","https://openalex.org/W2523937091","https://openalex.org/W2523993696","https://openalex.org/W2604662567","https://openalex.org/W2618530766","https://openalex.org/W2621571501","https://openalex.org/W2765658575","https://openalex.org/W2798405286","https://openalex.org/W2805627121","https://openalex.org/W2896457183","https://openalex.org/W2948358897","https://openalex.org/W2951390634","https://openalex.org/W2962681491","https://openalex.org/W2963037989","https://openalex.org/W2963150697","https://openalex.org/W2963155035","https://openalex.org/W2963524571","https://openalex.org/W2964241990","https://openalex.org/W2969844053","https://openalex.org/W2984862483","https://openalex.org/W2989322838","https://openalex.org/W2990503944","https://openalex.org/W2990964949","https://openalex.org/W3009192917","https://openalex.org/W3019301826","https://openalex.org/W3022778813","https://openalex.org/W3034221024","https://openalex.org/W3035284526","https://openalex.org/W3035365026","https://openalex.org/W3035392611","https://openalex.org/W3039060838","https://openalex.org/W3093309253","https://openalex.org/W3096935578","https://openalex.org/W3104862079","https://openalex.org/W3105232955","https://openalex.org/W3115684750","https://openalex.org/W3138516171","https://openalex.org/W3163842339","https://openalex.org/W3163971663","https://openalex.org/W3166366124","https://openalex.org/W3176362845","https://openalex.org/W3176425931","https://openalex.org/W3176689360","https://openalex.org/W3205021045","https://openalex.org/W3214192224","https://openalex.org/W3216659302","https://openalex.org/W3217340782","https://openalex.org/W4205510032","https://openalex.org/W4214612132","https://openalex.org/W4220790454","https://openalex.org/W4220863475","https://openalex.org/W4226396383","https://openalex.org/W4229494842","https://openalex.org/W4246193833","https://openalex.org/W4285118488","https://openalex.org/W4304098906","https://openalex.org/W4309724203","https://openalex.org/W4309938607","https://openalex.org/W4312321660","https://openalex.org/W4312463400","https://openalex.org/W4312560592","https://openalex.org/W4322747194","https://openalex.org/W4382464395","https://openalex.org/W4385245566","https://openalex.org/W4386057717","https://openalex.org/W6605963037","https://openalex.org/W6606029161","https://openalex.org/W6620707391","https://openalex.org/W6631190155","https://openalex.org/W6639432524","https://openalex.org/W6678262379","https://openalex.org/W6682132143","https://openalex.org/W6682631176","https://openalex.org/W6683167905","https://openalex.org/W6684090549","https://openalex.org/W6755977528","https://openalex.org/W6784184991","https://openalex.org/W6791353385","https://openalex.org/W6795711426","https://openalex.org/W6849783008","https://openalex.org/W6955071965"],"related_works":["https://openalex.org/W4210416330","https://openalex.org/W2775506363","https://openalex.org/W3088136942","https://openalex.org/W4290852288","https://openalex.org/W2949362007","https://openalex.org/W4388893791","https://openalex.org/W4283207562","https://openalex.org/W2963177403","https://openalex.org/W2330246314","https://openalex.org/W2949522393"],"abstract_inverted_index":{"Video":[0],"captioning":[1],"is":[2],"a":[3,37,42,94,180],"multi-modal":[4],"task":[5],"across":[6],"computer":[7],"vision":[8],"and":[9,21,46,92,130,144,152,172,187,219],"natural":[10,55],"language":[11],"processing.":[12],"Previous":[13],"methods":[14,24,52,91],"generally":[15],"follow":[16],"two":[17,124],"paradigms,":[18],"i.e.":[19,128],"template-based":[20,90],"sequence-based.":[22],"Template-based":[23],"can":[25],"generate":[26,53],"relatively":[27],"accurate":[28],"elements":[29,115,161],"(e.g.":[30],"humans,":[31],"objects,":[32],"or":[33],"actions)":[34],"to":[35,65,109,141,167,190,226],"complete":[36],"template":[38],"caption,":[39],"but":[40,59],"with":[41,184,209,224],"rather":[43],"limited":[44],"vocabulary":[45],"syntactic":[47],"structure;":[48],"in":[49,89,162],"contrast,":[50],"sequence-based":[51,118,139],"more":[54],"descriptions":[56],"like":[57],"humans":[58],"easily":[60],"suffer":[61],"element":[62,86],"errors":[63],"due":[64],"their":[66],"heavy":[67],"dependence":[68],"on":[69],"visual":[70,107],"features":[71,104,108],"that":[72,101,155,201],"often":[73],"contain":[74],"much":[75],"distracting":[76],"information.":[77],"In":[78,120],"this":[79],"work,":[80],"we":[81,122,178],"draw":[82],"lessons":[83],"from":[84,133],"the":[85,117,134,138,148,157,163,169,192],"extraction":[87],"manner":[88],"propose":[93],"novel":[95],"Element-aware":[96],"video":[97],"Captioning":[98],"(EvCap)":[99],"framework":[100],"applies":[102],"linguistic":[103,126],"beyond":[105],"general":[106],"consolidate":[110],"model":[111],"awareness":[112],"of":[113,137,150,159,175,195],"specific":[114],"under":[116,211],"paradigm.":[119],"particular,":[121],"introduce":[123],"new":[125],"features,":[127,132,177],"action":[129,143],"object-relevant":[131],"upstream":[135,213],"encoder":[136,214,227],"paradigm":[140],"encode":[142],"object":[145],"information":[146],"(in":[147],"forms":[149],"phrases":[151],"words":[153],"respectively)":[154],"benefits":[156],"generation":[158],"corresponding":[160],"final":[164],"description.":[165],"Moreover,":[166],"fuse":[168],"heterogeneous":[170],"representations":[171],"relieve":[173],"noise":[174],"inaccurate":[176],"design":[179],"post-operation":[181],"fusion":[182],"strategy,":[183],"semantic":[185],"interaction":[186],"energy":[188],"weighting":[189],"ensure":[191],"effective":[193],"usage":[194],"each":[196],"feature.":[197],"Experimental":[198],"results":[199],"show":[200],"our":[202],"EvCap":[203],"achieves":[204],"amazingly":[205],"promising":[206],"performance":[207],"compared":[208],"baselines":[210],"diverse":[212],"architectures":[215],"including":[216],"CNNs,":[217],"ViT":[218],"CLIP,":[220],"demonstrating":[221],"good":[222],"scalability":[223],"respect":[225],"choices.":[228]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":1}],"updated_date":"2026-04-07T14:57:38.498316","created_date":"2025-10-10T00:00:00"}
