{"id":"https://openalex.org/W4402915789","doi":"https://doi.org/10.1109/icip51287.2024.10647731","title":"Caseg: Clip-Based Action Segmentation With Learnable Text Prompt","display_name":"Caseg: Clip-Based Action Segmentation With Learnable Text Prompt","publication_year":2024,"publication_date":"2024-09-27","ids":{"openalex":"https://openalex.org/W4402915789","doi":"https://doi.org/10.1109/icip51287.2024.10647731"},"language":"en","primary_location":{"id":"doi:10.1109/icip51287.2024.10647731","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icip51287.2024.10647731","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Image Processing (ICIP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5089549208","display_name":"Su-Yuan Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Suyuan Huang","raw_affiliation_strings":["Beihang University,Intelligent Computing and Machine Learning Lab, School of ASEE"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beihang University,Intelligent Computing and Machine Learning Lab, School of ASEE","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090469702","display_name":"Haoxin Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Haoxin Zhang","raw_affiliation_strings":["Xiaohongshu Inc"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Xiaohongshu Inc","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036903821","display_name":"Yanyu Xu","orcid":"https://orcid.org/0000-0001-8926-7833"},"institutions":[{"id":"https://openalex.org/I3004594783","display_name":"Institute of High Performance Computing","ror":"https://ror.org/02n0ejh50","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3004594783","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Yanyu Xu","raw_affiliation_strings":["Institute of High Performance Computing, A*Star"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of High Performance Computing, A*Star","institution_ids":["https://openalex.org/I3004594783"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108151876","display_name":"Yan Gao","orcid":"https://orcid.org/0009-0003-5405-5496"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yan Gao","raw_affiliation_strings":["Xiaohongshu Inc"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Xiaohongshu Inc","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038805923","display_name":"Yao Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yao Hu","raw_affiliation_strings":["Xiaohongshu Inc"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Xiaohongshu Inc","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032405950","display_name":"Zengchang Qin","orcid":"https://orcid.org/0000-0002-8084-6721"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zengchang Qin","raw_affiliation_strings":["Beihang University,Intelligent Computing and Machine Learning Lab, School of ASEE"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beihang University,Intelligent Computing and Machine Learning Lab, School of ASEE","institution_ids":["https://openalex.org/I82880672"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.1489696,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"1","issue":null,"first_page":"2201","last_page":"2207"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9947999715805054,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9947999715805054,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9848999977111816,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9567000269889832,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6693398952484131},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.593297004699707},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5292301177978516},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.438078910112381},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3641393780708313},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.13166135549545288}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6693398952484131},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.593297004699707},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5292301177978516},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.438078910112381},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3641393780708313},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.13166135549545288},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icip51287.2024.10647731","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icip51287.2024.10647731","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Image Processing (ICIP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.5099999904632568}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W2491875666","https://openalex.org/W2550143307","https://openalex.org/W2963524571","https://openalex.org/W2963853051","https://openalex.org/W2970476646","https://openalex.org/W3034373833","https://openalex.org/W3083550439","https://openalex.org/W3108772932","https://openalex.org/W3119038403","https://openalex.org/W3166363426","https://openalex.org/W3198377975","https://openalex.org/W3204193736","https://openalex.org/W4221166681","https://openalex.org/W4312310776","https://openalex.org/W4312982010","https://openalex.org/W4313055276","https://openalex.org/W4385245566","https://openalex.org/W6755207826","https://openalex.org/W6790019176","https://openalex.org/W6791353385","https://openalex.org/W6801567822","https://openalex.org/W6802442395"],"related_works":["https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2772917594","https://openalex.org/W2775347418","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"Video":[0],"action":[1,53,95],"segmentation":[2,54,96],"aims":[3],"to":[4],"identify":[5],"and":[6,25,51,69,78,98,134,136],"localize":[7],"actions.":[8],"Existing":[9],"models":[10],"have":[11],"achieved":[12],"impressive":[13],"performance":[14,47],"with":[15],"pre-extracted":[16],"frame-level":[17],"features,":[18],"but":[19],"this":[20,36],"may":[21],"limit":[22],"zero-shot":[23],"learning":[24],"cross-dataset":[26],"inference,":[27],"especially":[28],"for":[29,45],"new":[30],"actions":[31],"or":[32],"scenes.":[33],"To":[34],"overcome":[35],"problem,":[37],"we":[38],"propose":[39],"a":[40,59,70],"novel":[41,52],"end-to-end":[42],"network":[43],"designed":[44],"robust":[46],"across":[48],"both":[49],"familiar":[50],"scenarios.":[55],"Our":[56,86],"approach":[57],"combines":[58],"plug-and-play":[60],"visual":[61],"prompt":[62,73],"module":[63],"enhancing":[64],"CLIP":[65,90,109],"features\u2019":[66],"temporal":[67],"understanding,":[68],"learnable":[71],"text":[72],"that":[74,89,108,113,140],"enriches":[75],"label":[76],"semantics":[77],"refines":[79],"the":[80,120,137,141],"model\u2019s":[81],"focus,":[82],"significantly":[83],"boosting":[84],"performance.":[85],"results":[87,138],"demonstrate":[88],"features":[91,110,115],"can":[92,100],"assist":[93],"in":[94],"tasks,":[97],"prompts":[99],"improve":[101],"task":[102],"effectiveness.":[103],"Furthermore,":[104],"our":[105],"findings":[106],"show":[107,139],"contain":[111],"information":[112],"i3d":[114],"do":[116],"not.":[117],"We":[118],"evaluate":[119],"proposed":[121,142],"method":[122],"on":[123],"several":[124],"video":[125],"datasets,":[126],"including":[127],"Georgia":[128],"Tech":[129],"Egocentric":[130],"Activities":[131],"(GTEA),":[132],"50Salads,":[133],"Breakfast,":[135],"model":[143],"outperforms":[144],"existing":[145],"SOTA":[146],"models.":[147]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
