{"id":"https://openalex.org/W4402981118","doi":"https://doi.org/10.1109/icme57554.2024.10687820","title":"Spot the Difference! Temporal Coarse to Fine to Finer Difference Spotting for Action Recognition in Videos","display_name":"Spot the Difference! Temporal Coarse to Fine to Finer Difference Spotting for Action Recognition in Videos","publication_year":2024,"publication_date":"2024-07-15","ids":{"openalex":"https://openalex.org/W4402981118","doi":"https://doi.org/10.1109/icme57554.2024.10687820"},"language":"en","primary_location":{"id":"doi:10.1109/icme57554.2024.10687820","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme57554.2024.10687820","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5023720727","display_name":"Yaoxin Li","orcid":"https://orcid.org/0000-0002-4737-4427"},"institutions":[{"id":"https://openalex.org/I151746483","display_name":"University of Waterloo","ror":"https://ror.org/01aff2v68","country_code":"CA","type":"education","lineage":["https://openalex.org/I151746483"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Yaoxin Li","raw_affiliation_strings":["University of Waterloo"],"affiliations":[{"raw_affiliation_string":"University of Waterloo","institution_ids":["https://openalex.org/I151746483"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112608090","display_name":"Deepak Sridhar","orcid":"https://orcid.org/0000-0003-4395-7366"},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California, San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Deepak Sridhar","raw_affiliation_strings":["University of California,San Diego"],"affiliations":[{"raw_affiliation_string":"University of California,San Diego","institution_ids":["https://openalex.org/I36258959"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101527001","display_name":"Hanwen Liang","orcid":"https://orcid.org/0000-0002-9892-752X"},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Hanwen Liang","raw_affiliation_strings":["University of Toronto"],"affiliations":[{"raw_affiliation_string":"University of Toronto","institution_ids":["https://openalex.org/I185261750"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042055103","display_name":"Alexander Wong","orcid":null},"institutions":[{"id":"https://openalex.org/I151746483","display_name":"University of Waterloo","ror":"https://ror.org/01aff2v68","country_code":"CA","type":"education","lineage":["https://openalex.org/I151746483"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Alexander Wong","raw_affiliation_strings":["University of Waterloo"],"affiliations":[{"raw_affiliation_string":"University of Waterloo","institution_ids":["https://openalex.org/I151746483"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5023720727"],"corresponding_institution_ids":["https://openalex.org/I151746483"],"apc_list":null,"apc_paid":null,"fwci":0.275,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.54469932,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9908999800682068,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12740","display_name":"Gait Recognition and Analysis","score":0.9751999974250793,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spotting","display_name":"Spotting","score":0.9585020542144775},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6005277037620544},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5419401526451111},{"id":"https://openalex.org/keywords/significant-difference","display_name":"Significant difference","score":0.5070474147796631},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4630231261253357},{"id":"https://openalex.org/keywords/temporal-difference-learning","display_name":"Temporal difference learning","score":0.4249940514564514},{"id":"https://openalex.org/keywords/action-recognition","display_name":"Action recognition","score":0.4187811613082886},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.339703768491745},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3376297652721405},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.20846593379974365},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.1568886637687683},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.08724462985992432},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.05564168095588684}],"concepts":[{"id":"https://openalex.org/C2779506182","wikidata":"https://www.wikidata.org/wiki/Q7580141","display_name":"Spotting","level":2,"score":0.9585020542144775},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6005277037620544},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5419401526451111},{"id":"https://openalex.org/C3018023364","wikidata":"https://www.wikidata.org/wiki/Q425265","display_name":"Significant difference","level":2,"score":0.5070474147796631},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4630231261253357},{"id":"https://openalex.org/C196340769","wikidata":"https://www.wikidata.org/wiki/Q7698910","display_name":"Temporal difference learning","level":3,"score":0.4249940514564514},{"id":"https://openalex.org/C2987834672","wikidata":"https://www.wikidata.org/wiki/Q4677630","display_name":"Action recognition","level":3,"score":0.4187811613082886},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.339703768491745},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3376297652721405},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.20846593379974365},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.1568886637687683},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.08724462985992432},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.05564168095588684},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icme57554.2024.10687820","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme57554.2024.10687820","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W2990152177","https://openalex.org/W3010010212","https://openalex.org/W3035413240","https://openalex.org/W3035524453","https://openalex.org/W4214727094","https://openalex.org/W4312266966","https://openalex.org/W4312302951","https://openalex.org/W4312560592","https://openalex.org/W4382467086","https://openalex.org/W4386065852"],"related_works":["https://openalex.org/W2034439647","https://openalex.org/W3184921334","https://openalex.org/W4249589822","https://openalex.org/W2103063669","https://openalex.org/W4380551034","https://openalex.org/W2033009170","https://openalex.org/W4386895402","https://openalex.org/W3202382261","https://openalex.org/W1576128429","https://openalex.org/W2269464716"],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3,65,83,104,134],"present":[4],"a":[5,53,58,67,97,106,118,139],"novel":[6,100,119,140],"difference-spotting":[7],"strategy":[8],"for":[9],"video":[10],"action":[11,48,79,89,153],"recognition":[12,80],"inspired":[13],"by":[14,19,42],"the":[15,20,25,32,111,115,124,130,145,174,189,212],"cognitive":[16],"challenges":[17],"posed":[18],"childhood":[21],"puzzle":[22],"game":[23],"\"Spot":[24],"Difference\".":[26],"Our":[27],"approach":[28],"aims":[29],"to":[30,35],"enhance":[31],"model\u2019s":[33],"capability":[34],"capture":[36],"time-series":[37],"variation":[38],"and":[39,49,90,164,199],"intricate":[40],"details":[41],"gradually":[43],"integrating":[44],"distinctive":[45],"information":[46,87],"between":[47,126],"non-action":[50,91],"segments":[51,92,128],"in":[52,96,110,211],"temporal":[54],"\"coarse-to-fine-to-finer\"":[55],"manner":[56],"within":[57,129,151],"discriminative":[59,69,86],"learning":[60,70,121],"framework.":[61],"To":[62],"achieve":[63,182],"this,":[64],"propose":[66],"model-agnostic":[68],"mechanism":[71],"that":[72],"can":[73],"be":[74],"easily":[75],"integrated":[76],"into":[77],"existing":[78],"networks.":[81],"Firstly,":[82],"incorporate":[84,135],"coarse-level":[85],"of":[88,114,147,176],"across":[93],"all":[94],"videos":[95],"corpus":[98],"using":[99],"booster":[101],"nets.":[102],"Secondly,":[103],"introduce":[105],"fine-level":[107],"discrimination":[108,137],"objective":[109],"penultimate":[112],"layer":[113],"network":[116],"through":[117,138],"contrastive":[120],"approach,":[122],"increasing":[123],"distinction":[125,146],"different":[127,148],"same":[131],"video.":[132],"Lastly,":[133],"finer":[136],"clip":[141],"matching":[142],"mechanism,":[143],"enhancing":[144],"consecutive":[149],"clips":[150],"an":[152],"segment.":[154],"Experimental":[155],"results":[156,195,209],"on":[157,196],"multiple":[158],"benchmark":[159],"datasets":[160],"(ActivityNet,":[161],"HACS,":[162],"FineAction)":[163],"backbone":[165],"architectures":[166],"(TSN,":[167],"TSM,":[168],"TANet,":[169],"TPN,":[170],"Timesformer,":[171],"VideoSwin)":[172],"demonstrate":[173],"effectiveness":[175],"our":[177,204],"proposed":[178],"mechanism.":[179],"We":[180],"consistently":[181],"significant":[183],"improvements":[184],"(0.33":[185],"-":[186],"4%)":[187],"over":[188],"baselines,":[190],"with":[191],"competitive":[192],"single":[193],"crop":[194],"ActivityNet":[197,213],"(87.9%)":[198],"HACS":[200],"(90.21%)":[201],"datasets.":[202],"Moreover,":[203],"technique":[205],"achieves":[206],"stateof-the-art":[207],"classifier":[208],"(94.8%)":[210],"2022":[214],"challenge\u2019s":[215],"validation":[216],"set.":[217]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
