{"id":"https://openalex.org/W4392903109","doi":"https://doi.org/10.1109/icassp48485.2024.10446233","title":"Modal Consensus and Contextual Separation for Weakly Supervised Temporal Action Localization","display_name":"Modal Consensus and Contextual Separation for Weakly Supervised Temporal Action Localization","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392903109","doi":"https://doi.org/10.1109/icassp48485.2024.10446233"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10446233","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10446233","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100346920","display_name":"Peng Liu","orcid":"https://orcid.org/0009-0003-1816-1670"},"institutions":[{"id":"https://openalex.org/I143413998","display_name":"Qingdao University of Science and Technology","ror":"https://ror.org/041j8js14","country_code":"CN","type":"education","lineage":["https://openalex.org/I143413998"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Peng Liu","raw_affiliation_strings":["Qingdao University of Science and Technology,Qingdao,China,266061"],"affiliations":[{"raw_affiliation_string":"Qingdao University of Science and Technology,Qingdao,China,266061","institution_ids":["https://openalex.org/I143413998"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079533226","display_name":"Chuanxu Wang","orcid":"https://orcid.org/0000-0002-3004-0738"},"institutions":[{"id":"https://openalex.org/I143413998","display_name":"Qingdao University of Science and Technology","ror":"https://ror.org/041j8js14","country_code":"CN","type":"education","lineage":["https://openalex.org/I143413998"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chuanxu Wang","raw_affiliation_strings":["Qingdao University of Science and Technology,Qingdao,China,266061"],"affiliations":[{"raw_affiliation_string":"Qingdao University of Science and Technology,Qingdao,China,266061","institution_ids":["https://openalex.org/I143413998"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101237143","display_name":"Min Zhao","orcid":"https://orcid.org/0009-0002-4787-5835"},"institutions":[{"id":"https://openalex.org/I143413998","display_name":"Qingdao University of Science and Technology","ror":"https://ror.org/041j8js14","country_code":"CN","type":"education","lineage":["https://openalex.org/I143413998"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Min Zhao","raw_affiliation_strings":["Qingdao University of Science and Technology,Qingdao,China,266061"],"affiliations":[{"raw_affiliation_string":"Qingdao University of Science and Technology,Qingdao,China,266061","institution_ids":["https://openalex.org/I143413998"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100346920"],"corresponding_institution_ids":["https://openalex.org/I143413998"],"apc_list":null,"apc_paid":null,"fwci":0.2632,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.46872275,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"4220","last_page":"4224"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7973716259002686},{"id":"https://openalex.org/keywords/optical-flow","display_name":"Optical flow","score":0.7179961800575256},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6354668140411377},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6336095333099365},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.6019441485404968},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.5918216109275818},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5398601293563843},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.48821333050727844},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.43825897574424744},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.41591787338256836},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.36574089527130127},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.12728285789489746}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7973716259002686},{"id":"https://openalex.org/C155542232","wikidata":"https://www.wikidata.org/wiki/Q736111","display_name":"Optical flow","level":3,"score":0.7179961800575256},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6354668140411377},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6336095333099365},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.6019441485404968},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.5918216109275818},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5398601293563843},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.48821333050727844},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.43825897574424744},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.41591787338256836},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.36574089527130127},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.12728285789489746},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10446233","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10446233","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320330944","display_name":"Nature","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1927052826","https://openalex.org/W2006623133","https://openalex.org/W2133515615","https://openalex.org/W2295107390","https://openalex.org/W2336403884","https://openalex.org/W2883286874","https://openalex.org/W2919974746","https://openalex.org/W2962876901","https://openalex.org/W2963524571","https://openalex.org/W2981998043","https://openalex.org/W2986407524","https://openalex.org/W3034623254","https://openalex.org/W3045623285","https://openalex.org/W3149230594","https://openalex.org/W3173698268","https://openalex.org/W3207927851","https://openalex.org/W4226500165","https://openalex.org/W4285146284","https://openalex.org/W4321609205","https://openalex.org/W4386071527","https://openalex.org/W6631190155","https://openalex.org/W6793520193"],"related_works":["https://openalex.org/W2023355163","https://openalex.org/W4386083130","https://openalex.org/W2069571255","https://openalex.org/W3111737715","https://openalex.org/W2117442182","https://openalex.org/W2081707527","https://openalex.org/W1975907365","https://openalex.org/W3125517176","https://openalex.org/W2062195135","https://openalex.org/W2163164795"],"abstract_inverted_index":{"Weakly-supervised":[0],"Temporal":[1],"Action":[2],"Localization":[3],"(W-TAL)":[4],"is":[5,122],"a":[6],"challenging":[7],"task":[8],"aiming":[9],"to":[10,28,63,73,89,135],"achieve":[11],"both":[12],"action":[13,43,75,114,130],"class":[14],"identification":[15],"and":[16,35,45,58,81,110,141,158],"localization":[17,44],"of":[18,33,113,148,169],"temporal":[19,111],"boundaries":[20],"using":[21],"video-level":[22],"label":[23],"learning.":[24],"Recent":[25],"methods":[26],"resort":[27],"basic":[29],"cascading":[30],"or":[31],"integration":[32],"appearance":[34,80],"optical":[36,82],"flow":[37,83],"features,":[38],"often":[39],"resulting":[40],"in":[41,164],"incomplete":[42],"ambiguity":[46],"distinguishing":[47],"foreground":[48,140],"from":[49],"background.":[50,142],"Therefore,":[51],"this":[52],"paper":[53,144],"introduces":[54],"the":[55,68,102,118,137,146,149,156,166],"Modal":[56],"Consensus":[57],"Context":[59],"Separation":[60],"(MCCS)":[61],"approach":[62],"address":[64],"these":[65,94],"complexities.":[66],"First,":[67],"modal":[69],"collaboration":[70],"module":[71],"proposes":[72],"enhance":[74],"feature":[76],"representation":[77],"by":[78],"synergizing":[79],"features":[84,134],"while":[85],"discarding":[86],"redundant":[87],"elements":[88],"eschew":[90],"suboptimal":[91],"outcomes.":[92],"Further,":[93],"augmented":[95],"bimodal":[96],"streams":[97],"are":[98],"meticulously":[99],"fused":[100],"via":[101],"spatiotemporal":[103],"self-attention":[104],"module,":[105],"which":[106],"adeptly":[107],"fuses":[108],"spatial":[109],"relationships":[112],"snippets.":[115],"In":[116],"addition,":[117],"hybrid":[119,133],"modeling":[120],"mechanism":[121],"employed":[123],"for":[124],"foreground-background":[125],"separation,":[126],"focusing":[127],"on":[128,155],"local":[129],"attributes":[131],"within":[132],"refine":[136],"differentiation":[138],"between":[139],"This":[143],"substantiates":[145],"efficacy":[147],"MCCS":[150],"method":[151],"through":[152],"rigorous":[153],"testing":[154],"THUMOS14":[157],"ActivityNet1.3":[159],"datasets,":[160],"demonstrating":[161],"its":[162],"superiority":[163],"tackling":[165],"intricate":[167],"facets":[168],"W-TAL.":[170]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
