{"id":"https://openalex.org/W3162041249","doi":"https://doi.org/10.1109/icassp39728.2021.9413610","title":"Multi-Directional Convolution Networks with Spatial-Temporal Feature Pyramid Module for Action Recognition","display_name":"Multi-Directional Convolution Networks with Spatial-Temporal Feature Pyramid Module for Action Recognition","publication_year":2021,"publication_date":"2021-05-13","ids":{"openalex":"https://openalex.org/W3162041249","doi":"https://doi.org/10.1109/icassp39728.2021.9413610","mag":"3162041249"},"language":"en","primary_location":{"id":"doi:10.1109/icassp39728.2021.9413610","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9413610","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068318066","display_name":"Bohong Yang","orcid":"https://orcid.org/0000-0001-7284-688X"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Bohong Yang","raw_affiliation_strings":["Shanghai Key Laboratory of Intelligent Information Processing, School of Computer Science, Fudan University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Key Laboratory of Intelligent Information Processing, School of Computer Science, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100371712","display_name":"Zijian Wang","orcid":"https://orcid.org/0000-0003-0574-0342"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zijian Wang","raw_affiliation_strings":["Shanghai Key Laboratory of Intelligent Information Processing, School of Computer Science, Fudan University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Key Laboratory of Intelligent Information Processing, School of Computer Science, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031121165","display_name":"Wu Ran","orcid":"https://orcid.org/0000-0001-8478-0750"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wu Ran","raw_affiliation_strings":["Shanghai Key Laboratory of Intelligent Information Processing, School of Computer Science, Fudan University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Key Laboratory of Intelligent Information Processing, School of Computer Science, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070355771","display_name":"Hong Lu","orcid":"https://orcid.org/0000-0002-4572-2854"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hong Lu","raw_affiliation_strings":["Shanghai Key Laboratory of Intelligent Information Processing, School of Computer Science, Fudan University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Key Laboratory of Intelligent Information Processing, School of Computer Science, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100728316","display_name":"Yi\u2010Ping Phoebe Chen","orcid":"https://orcid.org/0000-0002-4122-3767"},"institutions":[{"id":"https://openalex.org/I196829312","display_name":"La Trobe University","ror":"https://ror.org/01rxfrp27","country_code":"AU","type":"education","lineage":["https://openalex.org/I196829312"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Yi-Ping Phoebe Chen","raw_affiliation_strings":["La Trobe University, Melbourne, Australia"],"affiliations":[{"raw_affiliation_string":"La Trobe University, Melbourne, Australia","institution_ids":["https://openalex.org/I196829312"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5068318066"],"corresponding_institution_ids":["https://openalex.org/I24943067"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.04767974,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"2440","last_page":"2444"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12740","display_name":"Gait Recognition and Analysis","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9883999824523926,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.7416102290153503},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.727998673915863},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.6543627977371216},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6507104635238647},{"id":"https://openalex.org/keywords/pyramid","display_name":"Pyramid (geometry)","score":0.6244299411773682},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5972853899002075},{"id":"https://openalex.org/keywords/dimension","display_name":"Dimension (graph theory)","score":0.481808602809906},{"id":"https://openalex.org/keywords/action-recognition","display_name":"Action recognition","score":0.44628801941871643},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.41115567088127136},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3472940921783447},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.15774688124656677},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.12220785021781921}],"concepts":[{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.7416102290153503},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.727998673915863},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.6543627977371216},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6507104635238647},{"id":"https://openalex.org/C142575187","wikidata":"https://www.wikidata.org/wiki/Q3358290","display_name":"Pyramid (geometry)","level":2,"score":0.6244299411773682},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5972853899002075},{"id":"https://openalex.org/C33676613","wikidata":"https://www.wikidata.org/wiki/Q13415176","display_name":"Dimension (graph theory)","level":2,"score":0.481808602809906},{"id":"https://openalex.org/C2987834672","wikidata":"https://www.wikidata.org/wiki/Q4677630","display_name":"Action recognition","level":3,"score":0.44628801941871643},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.41115567088127136},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3472940921783447},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.15774688124656677},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.12220785021781921},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp39728.2021.9413610","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9413610","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W1522734439","https://openalex.org/W1536680647","https://openalex.org/W1944615693","https://openalex.org/W2016053056","https://openalex.org/W2086842362","https://openalex.org/W2342662179","https://openalex.org/W2424778531","https://openalex.org/W2508429489","https://openalex.org/W2565639579","https://openalex.org/W2601564443","https://openalex.org/W2618799552","https://openalex.org/W2625366777","https://openalex.org/W2891446678","https://openalex.org/W2963150697","https://openalex.org/W2963155035","https://openalex.org/W2963524571","https://openalex.org/W2964191259","https://openalex.org/W2990503944","https://openalex.org/W3024963902","https://openalex.org/W3035303837","https://openalex.org/W3035413240","https://openalex.org/W3035523846","https://openalex.org/W6755013148"],"related_works":["https://openalex.org/W4249847449","https://openalex.org/W44395729","https://openalex.org/W2765338038","https://openalex.org/W1496225612","https://openalex.org/W1566843515","https://openalex.org/W4292850564","https://openalex.org/W1523500768","https://openalex.org/W4303411729","https://openalex.org/W2965803933","https://openalex.org/W4211202157"],"abstract_inverted_index":{"Recent":[0],"attempts":[1],"show":[2,100],"that":[3,101],"factorizing":[4],"3D":[5],"convolutional":[6],"filters":[7],"into":[8],"separate":[9],"spatial":[10,42,88],"and":[11,51,69,121],"temporal":[12,22,27,75],"components":[13],"brings":[14],"impressive":[15],"improvement":[16],"in":[17,90,93],"action":[18,114],"recognition.":[19],"However,":[20],"traditional":[21,73],"convolution":[23],"operating":[24],"along":[25,59],"the":[26,34,66,72,80,102],"dimension":[28],"will":[29],"aggregate":[30],"unrelated":[31],"features,":[32],"since":[33],"feature":[35],"maps":[36],"of":[37],"fast-moving":[38],"objects":[39],"have":[40],"shifted":[41],"positions.":[43],"In":[44],"this":[45],"paper,":[46],"we":[47,78],"propose":[48,79],"a":[49,94],"novel":[50],"effective":[52],"Multi-Directional":[53],"Convolution":[54],"(MDConv),":[55],"which":[56,104],"extracts":[57],"features":[58],"different":[60,91],"spatial-temporal":[61],"orientations.":[62],"Especially,":[63],"MDConv":[64,107],"has":[65],"same":[67],"FLOPs":[68],"parameters":[70],"as":[71,118],"1D":[74],"convolution.":[76],"Also,":[77],"Spatial-Temporal":[81],"Feature":[82],"Pyramid":[83],"Module":[84],"(STFPM)":[85],"to":[86],"fuse":[87],"semantics":[89],"scales":[92],"light-weight":[95],"way.":[96],"Our":[97],"extensive":[98],"experiments":[99],"models":[103],"integrate":[105],"with":[106],"achieve":[108],"better":[109],"accuracy":[110],"on":[111],"several":[112],"large-scale":[113],"recognition":[115],"benchmarks":[116],"such":[117],"Kinetics,":[119],"AVA":[120],"Something-Something":[122],"V1&V2":[123],"datasets.":[124]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
