{"id":"https://openalex.org/W7108213551","doi":"https://doi.org/10.1109/tcsvt.2025.3639310","title":"CL-WTAL: Weakly-Supervised Temporal Complex Action Localization Based on Multi-Scale Contrast Learning","display_name":"CL-WTAL: Weakly-Supervised Temporal Complex Action Localization Based on Multi-Scale Contrast Learning","publication_year":2025,"publication_date":"2025-12-02","ids":{"openalex":"https://openalex.org/W7108213551","doi":"https://doi.org/10.1109/tcsvt.2025.3639310"},"language":null,"primary_location":{"id":"doi:10.1109/tcsvt.2025.3639310","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2025.3639310","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Weili Ding","orcid":"https://orcid.org/0000-0003-1671-7789"},"institutions":[{"id":"https://openalex.org/I39333907","display_name":"Yanshan University","ror":"https://ror.org/02txfnf15","country_code":"CN","type":"education","lineage":["https://openalex.org/I39333907"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Weili Ding","raw_affiliation_strings":["Key Laboratory of Intelligent Rehabilitation and Neromodulation of Hebei Province, Yanshan University, Qinhuangdao, China"],"raw_orcid":"https://orcid.org/0000-0003-1671-7789","affiliations":[{"raw_affiliation_string":"Key Laboratory of Intelligent Rehabilitation and Neromodulation of Hebei Province, Yanshan University, Qinhuangdao, China","institution_ids":["https://openalex.org/I39333907"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yu Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I39333907","display_name":"Yanshan University","ror":"https://ror.org/02txfnf15","country_code":"CN","type":"education","lineage":["https://openalex.org/I39333907"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Zhang","raw_affiliation_strings":["Key Laboratory of Intelligent Rehabilitation and Neromodulation of Hebei Province, Yanshan University, Qinhuangdao, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Key Laboratory of Intelligent Rehabilitation and Neromodulation of Hebei Province, Yanshan University, Qinhuangdao, China","institution_ids":["https://openalex.org/I39333907"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Lingyun Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I39333907","display_name":"Yanshan University","ror":"https://ror.org/02txfnf15","country_code":"CN","type":"education","lineage":["https://openalex.org/I39333907"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lingyun Yang","raw_affiliation_strings":["Key Laboratory of Intelligent Rehabilitation and Neromodulation of Hebei Province, Yanshan University, Qinhuangdao, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Key Laboratory of Intelligent Rehabilitation and Neromodulation of Hebei Province, Yanshan University, Qinhuangdao, China","institution_ids":["https://openalex.org/I39333907"]}]},{"author_position":"last","author":{"id":null,"display_name":"Shuo Hu","orcid":"https://orcid.org/0000-0003-4741-7578"},"institutions":[{"id":"https://openalex.org/I39333907","display_name":"Yanshan University","ror":"https://ror.org/02txfnf15","country_code":"CN","type":"education","lineage":["https://openalex.org/I39333907"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuo Hu","raw_affiliation_strings":["Key Laboratory of Intelligent Rehabilitation and Neromodulation of Hebei Province, Yanshan University, Qinhuangdao, China"],"raw_orcid":"https://orcid.org/0000-0003-4741-7578","affiliations":[{"raw_affiliation_string":"Key Laboratory of Intelligent Rehabilitation and Neromodulation of Hebei Province, Yanshan University, Qinhuangdao, China","institution_ids":["https://openalex.org/I39333907"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I39333907"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.57292541,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"36","issue":"4","first_page":"4900","last_page":"4912"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9866999983787537,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9866999983787537,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.003700000001117587,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12205","display_name":"Time Series Analysis and Forecasting","score":0.00139999995008111,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/dynamic-time-warping","display_name":"Dynamic time warping","score":0.6722000241279602},{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.5626999735832214},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5602999925613403},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.47929999232292175},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.47380000352859497},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4562000036239624},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.42899999022483826},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.4129999876022339},{"id":"https://openalex.org/keywords/action-recognition","display_name":"Action recognition","score":0.41019999980926514},{"id":"https://openalex.org/keywords/similarity-measure","display_name":"Similarity measure","score":0.4036000072956085}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7670999765396118},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7405999898910522},{"id":"https://openalex.org/C88516994","wikidata":"https://www.wikidata.org/wiki/Q1268863","display_name":"Dynamic time warping","level":2,"score":0.6722000241279602},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.5626999735832214},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5602999925613403},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.47929999232292175},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.47380000352859497},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4562000036239624},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.42899999022483826},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.4129999876022339},{"id":"https://openalex.org/C2987834672","wikidata":"https://www.wikidata.org/wiki/Q4677630","display_name":"Action recognition","level":3,"score":0.41019999980926514},{"id":"https://openalex.org/C2776517306","wikidata":"https://www.wikidata.org/wiki/Q29017317","display_name":"Similarity measure","level":2,"score":0.4036000072956085},{"id":"https://openalex.org/C157202957","wikidata":"https://www.wikidata.org/wiki/Q1659609","display_name":"Image warping","level":2,"score":0.39489999413490295},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.38839998841285706},{"id":"https://openalex.org/C102392041","wikidata":"https://www.wikidata.org/wiki/Q592860","display_name":"Sliding window protocol","level":3,"score":0.37929999828338623},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.3785000145435333},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.35510000586509705},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.3310000002384186},{"id":"https://openalex.org/C2780762811","wikidata":"https://www.wikidata.org/wiki/Q1784941","display_name":"Cosine similarity","level":3,"score":0.3253999948501587},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.3156000077724457},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.3070000112056732},{"id":"https://openalex.org/C62354387","wikidata":"https://www.wikidata.org/wiki/Q875399","display_name":"Boundary (topology)","level":2,"score":0.3059000074863434},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.3034999966621399},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.2872999906539917},{"id":"https://openalex.org/C2778751112","wikidata":"https://www.wikidata.org/wiki/Q835016","display_name":"Window (computing)","level":2,"score":0.2838999927043915},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.2809999883174896},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.2750000059604645},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.2712000012397766},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.2614000141620636},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2563999891281128},{"id":"https://openalex.org/C155542232","wikidata":"https://www.wikidata.org/wiki/Q736111","display_name":"Optical flow","level":3,"score":0.2517000138759613}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2025.3639310","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2025.3639310","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/8","display_name":"Decent work and economic growth","score":0.46652233600616455}],"awards":[{"id":"https://openalex.org/G1748874642","display_name":null,"funder_award_id":"246Z1804G","funder_id":"https://openalex.org/F4320322437","funder_display_name":"National Science and Technology Development Agency"},{"id":"https://openalex.org/G2840487366","display_name":null,"funder_award_id":"62073279","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5417780894","display_name":null,"funder_award_id":"U22A2050","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322437","display_name":"National Science and Technology Development Agency","ror":"https://ror.org/04vy95b61"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Temporal":[0],"action":[1,53,80,125,141],"localization":[2,54],"in":[3,13],"long-term":[4],"untrimmed":[5],"videos":[6],"remains":[7],"a":[8,38],"critical":[9],"yet":[10],"challenging":[11],"task":[12],"video":[14,134],"understanding,":[15],"with":[16,146],"existing":[17],"methods":[18],"often":[19],"relying":[20],"on":[21,132],"anchor-based":[22],"or":[23],"fully-supervised":[24],"frameworks":[25],"that":[26,108,137],"incur":[27],"heavy":[28],"computation":[29],"and":[30,48,55,98,112,143],"require":[31],"labor-intensive":[32],"frame-level":[33],"annotations.":[34],"This":[35],"paper":[36],"presents":[37],"novel":[39],"weakly-supervised":[40,151],"approach,":[41],"CL-WTAL,":[42],"which":[43],"leverages":[44],"multi-scale":[45,65],"contrast":[46],"learning":[47],"graph":[49,85],"convolution":[50,86],"for":[51],"accurate":[52],"recognition.":[56],"The":[57],"method":[58],"comprises":[59],"three":[60],"key":[61],"components:":[62],"(i)":[63],"A":[64,83,102],"sliding":[66],"window":[67],"mechanism":[68],"(long/normal/short":[69],"sequences)":[70],"to":[71,78,89,118],"segment":[72],"sub-actions":[73],"from":[74],"complex":[75],"videos,":[76],"adapting":[77],"diverse":[79],"durations;":[81],"(ii)":[82],"spatio-temporal":[84],"network":[87],"(ST-RGCN)":[88],"extract":[90],"skeletal":[91],"feature":[92,120],"vectors,":[93],"integrating":[94],"human":[95],"motion":[96],"dynamics":[97],"environmental":[99],"context;":[100],"(iii)":[101],"contrastive":[103],"learning-based":[104],"similarity":[105,111],"evaluation":[106],"framework":[107],"combines":[109],"cosine":[110],"Dynamic":[113],"Time":[114],"Warping":[115],"(DTW)":[116],"distance":[117],"measure":[119],"vector":[121],"relationships,":[122],"enabling":[123],"precise":[124],"boundary":[126],"detection":[127],"without":[128],"extensive":[129],"fine-tuning.":[130],"Experiments":[131],"daily-life":[133],"datasets":[135],"demonstrate":[136],"CL-WTAL":[138],"effectively":[139],"localizes":[140],"intervals":[142],"classifies":[144],"actions":[145],"high":[147],"accuracy,":[148],"outperforming":[149],"state-of-the-art":[150],"methods.":[152]},"counts_by_year":[],"updated_date":"2026-04-07T06:01:17.266235","created_date":"2025-12-03T00:00:00"}
