{"id":"https://openalex.org/W4408353384","doi":"https://doi.org/10.1109/icassp49660.2025.10889703","title":"Granularity-Aware Contrastive Learning for Fine-Grained Action Recognition","display_name":"Granularity-Aware Contrastive Learning for Fine-Grained Action Recognition","publication_year":2025,"publication_date":"2025-03-12","ids":{"openalex":"https://openalex.org/W4408353384","doi":"https://doi.org/10.1109/icassp49660.2025.10889703"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49660.2025.10889703","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10889703","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5013248684","display_name":"Hailun Zhang","orcid":"https://orcid.org/0000-0001-9818-3332"},"institutions":[{"id":"https://openalex.org/I24185976","display_name":"Sichuan University","ror":"https://ror.org/011ashp19","country_code":"CN","type":"education","lineage":["https://openalex.org/I24185976"]},{"id":"https://openalex.org/I4210125143","display_name":"Chengdu University","ror":"https://ror.org/034z67559","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210125143"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hailun Zhang","raw_affiliation_strings":["Sichuan University,College of Computer Science,Chengdu,China"],"affiliations":[{"raw_affiliation_string":"Sichuan University,College of Computer Science,Chengdu,China","institution_ids":["https://openalex.org/I4210125143","https://openalex.org/I24185976"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100768845","display_name":"Xinrui Wang","orcid":"https://orcid.org/0000-0001-7003-4917"},"institutions":[{"id":"https://openalex.org/I4210125143","display_name":"Chengdu University","ror":"https://ror.org/034z67559","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210125143"]},{"id":"https://openalex.org/I24185976","display_name":"Sichuan University","ror":"https://ror.org/011ashp19","country_code":"CN","type":"education","lineage":["https://openalex.org/I24185976"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinrui Wang","raw_affiliation_strings":["Sichuan University,College of Computer Science,Chengdu,China"],"affiliations":[{"raw_affiliation_string":"Sichuan University,College of Computer Science,Chengdu,China","institution_ids":["https://openalex.org/I4210125143","https://openalex.org/I24185976"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5085914001","display_name":"Qijun Zhao","orcid":"https://orcid.org/0000-0003-4651-7163"},"institutions":[{"id":"https://openalex.org/I4210125143","display_name":"Chengdu University","ror":"https://ror.org/034z67559","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210125143"]},{"id":"https://openalex.org/I24185976","display_name":"Sichuan University","ror":"https://ror.org/011ashp19","country_code":"CN","type":"education","lineage":["https://openalex.org/I24185976"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qijun Zhao","raw_affiliation_strings":["Sichuan University,College of Computer Science,Chengdu,China"],"affiliations":[{"raw_affiliation_string":"Sichuan University,College of Computer Science,Chengdu,China","institution_ids":["https://openalex.org/I4210125143","https://openalex.org/I24185976"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5013248684"],"corresponding_institution_ids":["https://openalex.org/I24185976","https://openalex.org/I4210125143"],"apc_list":null,"apc_paid":null,"fwci":2.6381,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.88358744,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9914000034332275,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12740","display_name":"Gait Recognition and Analysis","score":0.9778000116348267,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/granularity","display_name":"Granularity","score":0.7964785099029541},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7890208959579468},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5285643339157104},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.47167691588401794},{"id":"https://openalex.org/keywords/action-recognition","display_name":"Action recognition","score":0.46925637125968933},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3510274887084961},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3262014389038086},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.10293948650360107}],"concepts":[{"id":"https://openalex.org/C177774035","wikidata":"https://www.wikidata.org/wiki/Q1246948","display_name":"Granularity","level":2,"score":0.7964785099029541},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7890208959579468},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5285643339157104},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47167691588401794},{"id":"https://openalex.org/C2987834672","wikidata":"https://www.wikidata.org/wiki/Q4677630","display_name":"Action recognition","level":3,"score":0.46925637125968933},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3510274887084961},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3262014389038086},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.10293948650360107},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49660.2025.10889703","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10889703","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W2337252826","https://openalex.org/W2625366777","https://openalex.org/W2895243423","https://openalex.org/W2977341128","https://openalex.org/W2986674040","https://openalex.org/W2990152177","https://openalex.org/W2990503944","https://openalex.org/W3094502228","https://openalex.org/W3151130473","https://openalex.org/W3206930349","https://openalex.org/W4214612132","https://openalex.org/W4312480274","https://openalex.org/W4312644729","https://openalex.org/W4386159789","https://openalex.org/W4388854793","https://openalex.org/W6749916090","https://openalex.org/W6755977528","https://openalex.org/W6759579507","https://openalex.org/W6780226713","https://openalex.org/W6791353385","https://openalex.org/W6797263693","https://openalex.org/W6846313647","https://openalex.org/W6849878650"],"related_works":["https://openalex.org/W2931688134","https://openalex.org/W2377919138","https://openalex.org/W2378857091","https://openalex.org/W103652678","https://openalex.org/W4226090359","https://openalex.org/W2059697060","https://openalex.org/W936373746","https://openalex.org/W1576128429","https://openalex.org/W4401071206","https://openalex.org/W2269464716"],"abstract_inverted_index":{"The":[0],"contrastive":[1,152],"learning":[2,153],"paradigm":[3],"has":[4],"been":[5],"widely":[6],"used":[7],"for":[8,154],"image-language":[9],"pre-training":[10],"and":[11,47,84,128,139,169,172,178],"extended":[12],"to":[13,19,73,104,150,201],"video-text":[14],"tuning.":[15],"These":[16],"approaches":[17],"aim":[18],"maximize":[20],"the":[21,43,53,91,96,106,110,122,126,144,174,194],"similarity":[22],"between":[23,181],"positive":[24,46],"sample":[25,83,107,127],"pairs":[26,49,102],"while":[27],"minimizing":[28],"that":[29,86],"of":[30,45,55,112,137,166,176,189,196],"negative":[31,48,101],"ones":[32],"through":[33,161],"an":[34],"alignment":[35,170],"objective.":[36],"Their":[37],"performance":[38],"is":[39,60,159],"highly":[40],"affected":[41],"by":[42],"definition":[44,165],"which":[50],"depends":[51],"on":[52,186],"granularity":[54],"label":[56,99,131],"classification.":[57],"This":[58,158],"effect":[59],"particularly":[61],"apparent":[62],"in":[63],"video":[64,82],"action":[65,156,191],"recognition,":[66],"where":[67],"different":[68],"fine-grained":[69,92,140,155,179,190],"actions":[70],"may":[71],"belong":[72],"a":[74,81,134,163],"shared":[75],"coarse":[76,98,115,138,177],"label.":[77],"Therefore,":[78],"indiscriminately":[79],"treating":[80],"labels":[85],"are":[87],"not":[88],"identical":[89],"at":[90],"level":[93],"but":[94],"share":[95],"same":[97],"as":[100],"leads":[103],"pushing":[105],"apart":[108],"from":[109,124],"cluster":[111],"its":[113,129],"basic":[114],"action.":[116],"Such":[117],"conflict":[118],"can":[119],"potentially":[120],"prevent":[121],"model":[123],"pulling":[125],"target":[130],"closer.":[132],"For":[133],"balanced":[135],"understanding":[136],"distinctions,":[141],"we":[142],"propose":[143],"Granularity-Aware":[145],"Contrastive":[146],"Learning":[147],"(GACon)":[148],"framework":[149],"improve":[151],"recognition.":[157],"achieved":[160],"(i)":[162],"refined":[164],"sample-label":[167],"relation":[168],"objectives,":[171],"(ii)":[173],"exchange":[175],"information":[180],"two":[182],"granularity-distinct":[183],"experts.":[184],"Experiments":[185],"four":[187],"benchmarks":[188],"recognition":[192],"show":[193],"superiority":[195],"our":[197],"proposed":[198],"GACon":[199],"compared":[200],"existing":[202],"approaches.":[203]},"counts_by_year":[{"year":2026,"cited_by_count":2}],"updated_date":"2025-12-28T23:10:05.387466","created_date":"2025-10-10T00:00:00"}
