{"id":"https://openalex.org/W4283219253","doi":"https://doi.org/10.1142/s021800142255014x","title":"Tri-Modal Dense Video Captioning Based on Fine-Grained Aligned Text and Anchor-Free Event Proposals Generator","display_name":"Tri-Modal Dense Video Captioning Based on Fine-Grained Aligned Text and Anchor-Free Event Proposals Generator","publication_year":2022,"publication_date":"2022-06-21","ids":{"openalex":"https://openalex.org/W4283219253","doi":"https://doi.org/10.1142/s021800142255014x"},"language":"en","primary_location":{"id":"doi:10.1142/s021800142255014x","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s021800142255014x","pdf_url":null,"source":{"id":"https://openalex.org/S41486457","display_name":"International Journal of Pattern Recognition and Artificial Intelligence","issn_l":"0218-0014","issn":["0218-0014","1793-6381"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Pattern Recognition and Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5005095177","display_name":"Jingjing Niu","orcid":"https://orcid.org/0000-0001-9515-1079"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingjing Niu","raw_affiliation_strings":["School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, P. R. China"],"affiliations":[{"raw_affiliation_string":"School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, P. R. China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051731173","display_name":"Yulai Xie","orcid":"https://orcid.org/0000-0003-0764-6579"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yulai Xie","raw_affiliation_strings":["Hitachi China Research Laboratory, Beijing, P. R. China"],"affiliations":[{"raw_affiliation_string":"Hitachi China Research Laboratory, Beijing, P. R. China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100354625","display_name":"Yang Zhang","orcid":"https://orcid.org/0000-0002-0523-8478"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Zhang","raw_affiliation_strings":["Hitachi China Research Laboratory, Beijing, P. R. China","School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, P. R. China"],"affiliations":[{"raw_affiliation_string":"Hitachi China Research Laboratory, Beijing, P. R. China","institution_ids":[]},{"raw_affiliation_string":"School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, P. R. China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100434979","display_name":"Jinyu Zhang","orcid":"https://orcid.org/0000-0001-7341-6830"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinyu Zhang","raw_affiliation_strings":["School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, P. R. China"],"affiliations":[{"raw_affiliation_string":"School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, P. R. China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100349294","display_name":"Yanfei Zhang","orcid":"https://orcid.org/0000-0003-1088-6143"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanfei Zhang","raw_affiliation_strings":["Hitachi China Research Laboratory, Beijing, P. R. China","School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, P. R. China"],"affiliations":[{"raw_affiliation_string":"Hitachi China Research Laboratory, Beijing, P. R. China","institution_ids":[]},{"raw_affiliation_string":"School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, P. R. China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100441099","display_name":"Lei Xiao","orcid":"https://orcid.org/0000-0003-0307-6206"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiao Lei","raw_affiliation_strings":["School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, P. R. China"],"affiliations":[{"raw_affiliation_string":"School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, P. R. China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100652923","display_name":"Fang Ren","orcid":"https://orcid.org/0000-0002-2251-9220"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Fang Ren","raw_affiliation_strings":["School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, P. R. China"],"affiliations":[{"raw_affiliation_string":"School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, P. R. China","institution_ids":["https://openalex.org/I92403157"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5100652923"],"corresponding_institution_ids":["https://openalex.org/I92403157"],"apc_list":null,"apc_paid":null,"fwci":0.302,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.52820589,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":"36","issue":"12","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.8903641700744629},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8606566786766052},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.6777336001396179},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.5771334767341614},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5686310529708862},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5339540243148804},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4795346260070801},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4738633632659912},{"id":"https://openalex.org/keywords/generator","display_name":"Generator (circuit theory)","score":0.4679136872291565},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4645867645740509},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4490334987640381},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.43631476163864136},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.13202223181724548},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.07946938276290894},{"id":"https://openalex.org/keywords/power","display_name":"Power (physics)","score":0.07720744609832764}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.8903641700744629},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8606566786766052},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.6777336001396179},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.5771334767341614},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5686310529708862},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5339540243148804},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4795346260070801},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4738633632659912},{"id":"https://openalex.org/C2780992000","wikidata":"https://www.wikidata.org/wiki/Q17016113","display_name":"Generator (circuit theory)","level":3,"score":0.4679136872291565},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4645867645740509},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4490334987640381},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.43631476163864136},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.13202223181724548},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.07946938276290894},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.07720744609832764},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1142/s021800142255014x","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s021800142255014x","pdf_url":null,"source":{"id":"https://openalex.org/S41486457","display_name":"International Journal of Pattern Recognition and Artificial Intelligence","issn_l":"0218-0014","issn":["0218-0014","1793-6381"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Pattern Recognition and Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6143712807","display_name":null,"funder_award_id":"FRF-BD-20-11A","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"}],"funders":[{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W2064675550","https://openalex.org/W2133459682","https://openalex.org/W2139501017","https://openalex.org/W2183341477","https://openalex.org/W2250539671","https://openalex.org/W2508429489","https://openalex.org/W2519328139","https://openalex.org/W2526050071","https://openalex.org/W2739107216","https://openalex.org/W2747920239","https://openalex.org/W2755876276","https://openalex.org/W2766375149","https://openalex.org/W2768066693","https://openalex.org/W2785892019","https://openalex.org/W2883910824","https://openalex.org/W2885775891","https://openalex.org/W2899879331","https://openalex.org/W2951098185","https://openalex.org/W2951390634","https://openalex.org/W2953461088","https://openalex.org/W2962799512","https://openalex.org/W2962990649","https://openalex.org/W2962994439","https://openalex.org/W2963177403","https://openalex.org/W2963351113","https://openalex.org/W2963524571","https://openalex.org/W2963576560","https://openalex.org/W2963782415","https://openalex.org/W2963811641","https://openalex.org/W2963843052","https://openalex.org/W2963916161","https://openalex.org/W2968104955","https://openalex.org/W2972589507","https://openalex.org/W2982770724","https://openalex.org/W2985144848","https://openalex.org/W3034464851","https://openalex.org/W3035392611","https://openalex.org/W3043840704","https://openalex.org/W4212774754"],"related_works":["https://openalex.org/W2547835662","https://openalex.org/W4312845724","https://openalex.org/W4312545247","https://openalex.org/W4224046780","https://openalex.org/W4384210086","https://openalex.org/W4364297074","https://openalex.org/W4298201857","https://openalex.org/W4312266680","https://openalex.org/W4312939826","https://openalex.org/W2735824434"],"abstract_inverted_index":{"Multi-modal":[0],"dense":[1,36,89],"video":[2,37,56,90],"captioning":[3,91],"is":[4],"a":[5,17,50,68,75,105],"task":[6,92],"using":[7],"multiple":[8,95],"information":[9,62,117],"to":[10,63,85],"detect":[11],"all":[12],"meaningful":[13],"events":[14],"and":[15,100,115,171],"generate":[16],"textual":[18],"description":[19,66],"for":[20,67],"each":[21,134],"event.":[22],"The":[23,46],"existing":[24],"works":[25],"mainly":[26],"rely":[27],"on":[28,144],"single":[29],"visual":[30],"or":[31],"dual":[32],"audio-visual":[33],"modals":[34],"in":[35],"captioning,":[38],"while":[39],"completely":[40],"ignoring":[41],"the":[42,55,64,87,113,138,145,165],"text":[43,47,167],"modal":[44,48],"(subtitle).":[45],"has":[49],"similar":[51],"data":[52,172],"structure":[53],"as":[54],"captions,":[57],"which":[58,128],"provides":[59],"immediate":[60],"semantic":[61],"content":[65],"video.":[69],"In":[70],"this":[71],"paper,":[72],"we":[73,103,120],"propose":[74,121],"novel":[76],"framework,":[77],"called":[78],"Two-Stage":[79],"Cross-Modal":[80],"Encoding":[81],"Transformer":[82],"Network":[83],"(TS-CMETN),":[84],"realize":[86],"multi-modal":[88],"by":[93],"fusing":[94],"features,":[96],"including":[97],"audio,":[98],"visual,":[99],"text.":[101],"First,":[102],"design":[104],"two-stage":[106],"feature":[107],"fusion":[108],"encoder":[109],"that":[110,150],"hierarchically":[111],"achieves":[112,154],"intra-":[114],"inter-modal":[116],"interaction.":[118],"Second,":[119],"an":[122],"anchor-free":[123],"temporal":[124],"event":[125,131],"proposal":[126],"module,":[127],"efficiently":[129],"generates":[130],"proposals":[132],"at":[133,175],"time":[135],"step":[136],"without":[137],"complex":[139],"anchor":[140],"calculation.":[141],"Extensive":[142],"experiments":[143],"ActivityNet":[146],"Captions":[147],"dataset":[148],"show":[149],"our":[151,158],"proposed":[152],"framework":[153],"high":[155],"performance.":[156],"Moreover,":[157],"approach":[159],"can":[160],"adaptively":[161],"handle":[162],"cases":[163],"of":[164],"missing":[166],"modal.":[168],"Our":[169],"code":[170],"are":[173],"available":[174],"https://github.com/xieyulai/TM-CMETN":[176],".":[177]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
