{"id":"https://openalex.org/W3206261212","doi":"https://doi.org/10.1145/3474085.3475534","title":"Cascade Cross-modal Attention Network for Video Actor and Action Segmentation from a Sentence","display_name":"Cascade Cross-modal Attention Network for Video Actor and Action Segmentation from a Sentence","publication_year":2021,"publication_date":"2021-10-17","ids":{"openalex":"https://openalex.org/W3206261212","doi":"https://doi.org/10.1145/3474085.3475534","mag":"3206261212"},"language":"en","primary_location":{"id":"doi:10.1145/3474085.3475534","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3474085.3475534","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100357392","display_name":"Weidong Chen","orcid":"https://orcid.org/0000-0003-2774-2875"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Weidong Chen","raw_affiliation_strings":["University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100775216","display_name":"Guorong Li","orcid":"https://orcid.org/0000-0003-3954-2387"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guorong Li","raw_affiliation_strings":["University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055937409","display_name":"Xinfeng Zhang","orcid":"https://orcid.org/0000-0002-7517-3868"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinfeng Zhang","raw_affiliation_strings":["University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058231829","display_name":"Hongyang Yu","orcid":"https://orcid.org/0000-0003-0036-531X"},"institutions":[{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongyang Yu","raw_affiliation_strings":["Peng Cheng Laboratory, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Peng Cheng Laboratory, Shenzhen, China","institution_ids":["https://openalex.org/I4210136793"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100669242","display_name":"Shuhui Wang","orcid":"https://orcid.org/0000-0002-5931-0527"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuhui Wang","raw_affiliation_strings":["Institute of Computing Technology Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5028597017","display_name":"Qingming Huang","orcid":"https://orcid.org/0000-0001-7542-296X"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingming Huang","raw_affiliation_strings":["University of Chinese Academy of Sciences &amp; Institute of Computing Technology Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences &amp; Institute of Computing Technology Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I4210165038"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100357392"],"corresponding_institution_ids":["https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":0.7686,"has_fulltext":false,"cited_by_count":16,"citation_normalized_percentile":{"value":0.73948138,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"4053","last_page":"4062"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.8746383190155029},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8269824981689453},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.7878004908561707},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.7411245107650757},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.630300760269165},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5743856430053711},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4927676022052765},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.45838209986686707},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.44286566972732544},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.41805899143218994},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4165894389152527},{"id":"https://openalex.org/keywords/semantic-matching","display_name":"Semantic matching","score":0.414841890335083},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4109647274017334},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.36787039041519165}],"concepts":[{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.8746383190155029},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8269824981689453},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.7878004908561707},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.7411245107650757},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.630300760269165},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5743856430053711},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4927676022052765},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.45838209986686707},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.44286566972732544},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.41805899143218994},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4165894389152527},{"id":"https://openalex.org/C2778493491","wikidata":"https://www.wikidata.org/wiki/Q7449072","display_name":"Semantic matching","level":3,"score":0.414841890335083},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4109647274017334},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.36787039041519165},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3474085.3475534","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3474085.3475534","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10","score":0.7200000286102295}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":61,"referenced_works":["https://openalex.org/W1536680647","https://openalex.org/W1861492603","https://openalex.org/W1905722737","https://openalex.org/W2117539524","https://openalex.org/W2153579005","https://openalex.org/W2194775991","https://openalex.org/W2251512949","https://openalex.org/W2302548814","https://openalex.org/W2463565445","https://openalex.org/W2583360688","https://openalex.org/W2592388817","https://openalex.org/W2599765304","https://openalex.org/W2602753196","https://openalex.org/W2611788449","https://openalex.org/W2734973448","https://openalex.org/W2747053578","https://openalex.org/W2784458614","https://openalex.org/W2790888757","https://openalex.org/W2792152979","https://openalex.org/W2804243936","https://openalex.org/W2894964039","https://openalex.org/W2938603906","https://openalex.org/W2950178297","https://openalex.org/W2950728047","https://openalex.org/W2951548327","https://openalex.org/W2952204649","https://openalex.org/W2955058313","https://openalex.org/W2960655175","https://openalex.org/W2962764817","https://openalex.org/W2963017553","https://openalex.org/W2963091558","https://openalex.org/W2963094665","https://openalex.org/W2963109634","https://openalex.org/W2963150697","https://openalex.org/W2963449390","https://openalex.org/W2963524571","https://openalex.org/W2963560969","https://openalex.org/W2963843782","https://openalex.org/W2963954913","https://openalex.org/W2963971014","https://openalex.org/W2964067226","https://openalex.org/W2964089981","https://openalex.org/W2964286567","https://openalex.org/W2964345792","https://openalex.org/W2970950121","https://openalex.org/W2980088508","https://openalex.org/W2981587852","https://openalex.org/W2981663434","https://openalex.org/W2983693499","https://openalex.org/W2984121207","https://openalex.org/W2987401211","https://openalex.org/W2997063389","https://openalex.org/W3006987161","https://openalex.org/W3033406713","https://openalex.org/W3034777757","https://openalex.org/W3034804856","https://openalex.org/W3035365026","https://openalex.org/W3093314701","https://openalex.org/W3093355642","https://openalex.org/W3095718427","https://openalex.org/W4236965008"],"related_works":["https://openalex.org/W2965546495","https://openalex.org/W4389116644","https://openalex.org/W2153315159","https://openalex.org/W3103844505","https://openalex.org/W259157601","https://openalex.org/W4205463238","https://openalex.org/W2110523656","https://openalex.org/W1482209366","https://openalex.org/W2521627374","https://openalex.org/W2981954115"],"abstract_inverted_index":{"In":[0],"this":[1,88],"paper,":[2],"we":[3,111,178],"address":[4],"the":[5,10,16,20,29,34,37,58,61,65,68,71,74,77,82,91,95,100,104,132,159,169,173,197],"problem":[6],"that":[7,192],"selectively":[8],"segments":[9],"actor":[11],"and":[12,54,63,108,146,166,186],"its":[13],"action":[14],"in":[15,47,87],"video":[17,35],"clip":[18],"given":[19],"sentence":[21,62,75,92],"description.":[22],"The":[23,84],"main":[24],"challenge":[25],"is":[26,50],"to":[27,51,93,117,123,128,130],"match":[28],"local":[30],"semantic":[31],"features":[32,98,122],"of":[33,60,67,73,81,99,106,164,175],"with":[36,141],"heterogeneous":[38],"textual":[39],"features.":[40,136],"A":[41],"widely":[42],"used":[43],"language":[44,125,135],"processing":[45],"method":[46,145,194],"previous":[48],"works":[49],"leverage":[52,118],"bi-LSTM":[53],"self-attention,":[55],"which":[56],"fixed":[57],"attention":[59,72,116],"neglected":[64],"personality":[66],"video,":[69,101],"leading":[70],"mismatch":[76],"most":[78,96],"discriminative":[79,97,133],"feature":[80],"video.":[83],"proposed":[85,155],"algorithm":[86],"paper":[89],"allows":[90],"learn":[94],"remarkably":[102],"improving":[103,168],"accuracy":[105],"matching":[107],"segmentation.":[109],"Specifically,":[110],"propose":[112],"a":[113,142,147],"cascade":[114],"cross-modal":[115],"two":[119,182],"perspectives":[120],"visual":[121],"attend":[124],"from":[126,157,162],"coarse":[127],"fine":[129],"generate":[131],"vision-aware":[134],"Moreover,":[137],"equipping":[138],"our":[139,154,176,193],"framework":[140],"contrastive":[143],"learning":[144],"designed":[148],"hard":[149],"negative":[150],"mining":[151],"strategy":[152],"benefits":[153],"network":[156],"identifying":[158],"positive":[160],"sample":[161],"numbers":[163],"negatives,":[165],"further":[167],"performance.":[170],"To":[171],"demonstrate":[172],"effectiveness":[174],"approach,":[177],"conduct":[179],"experiments":[180],"on":[181],"datasets:":[183],"A2D":[184],"Sentences":[185],"J-HMDB":[187],"Sentences.":[188],"Experimental":[189],"results":[190],"show":[191],"significantly":[195],"improves":[196],"performance":[198],"over":[199],"recent":[200],"state-of-the-art":[201],"methods.":[202]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":5}],"updated_date":"2026-02-27T16:54:17.756197","created_date":"2025-10-10T00:00:00"}
