{"id":"https://openalex.org/W2947963855","doi":"https://doi.org/10.7916/d8-srkz-t696","title":"Deep Learning for Action Understanding in Video","display_name":"Deep Learning for Action Understanding in Video","publication_year":2019,"publication_date":"2019-01-01","ids":{"openalex":"https://openalex.org/W2947963855","doi":"https://doi.org/10.7916/d8-srkz-t696","mag":"2947963855"},"language":"en","primary_location":{"id":"mag:2947963855","is_oa":false,"landing_page_url":"https://academiccommons.columbia.edu/doi/10.7916/d8-qx68-n412/download","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":null},"type":"article","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.7916/d8-srkz-t696","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112513463","display_name":"Zheng Shou","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Shou, Zheng","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5112513463"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.04193023,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.576339602470398},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.40751737356185913},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3652766942977905},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.33906853199005127}],"concepts":[{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.576339602470398},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.40751737356185913},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3652766942977905},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.33906853199005127},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"mag:2947963855","is_oa":false,"landing_page_url":"https://academiccommons.columbia.edu/doi/10.7916/d8-qx68-n412/download","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":null},{"id":"doi:10.7916/d8-srkz-t696","is_oa":true,"landing_page_url":"https://doi.org/10.7916/d8-srkz-t696","pdf_url":null,"source":{"id":"https://openalex.org/S4306402601","display_name":"Columbia Academic Commons (Columbia University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I78577930","host_organization_name":"Columbia University","host_organization_lineage":["https://openalex.org/I78577930"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"doi:10.7916/d8-srkz-t696","is_oa":true,"landing_page_url":"https://doi.org/10.7916/d8-srkz-t696","pdf_url":null,"source":{"id":"https://openalex.org/S4306402601","display_name":"Columbia Academic Commons (Columbia University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I78577930","host_organization_name":"Columbia University","host_organization_lineage":["https://openalex.org/I78577930"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article-journal"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":145,"referenced_works":["https://openalex.org/W24089286","https://openalex.org/W66452226","https://openalex.org/W225975350","https://openalex.org/W243985932","https://openalex.org/W639708223","https://openalex.org/W1522734439","https://openalex.org/W1536680647","https://openalex.org/W1551696353","https://openalex.org/W1567708943","https://openalex.org/W1578066333","https://openalex.org/W1578985305","https://openalex.org/W1599058448","https://openalex.org/W1606858007","https://openalex.org/W1686810756","https://openalex.org/W1745334888","https://openalex.org/W1797109199","https://openalex.org/W1849277567","https://openalex.org/W1903029394","https://openalex.org/W1905882502","https://openalex.org/W1923332106","https://openalex.org/W1927052826","https://openalex.org/W1945129080","https://openalex.org/W1947481528","https://openalex.org/W1950136256","https://openalex.org/W1966385142","https://openalex.org/W1977991519","https://openalex.org/W1983364832","https://openalex.org/W1983705368","https://openalex.org/W2016776918","https://openalex.org/W2018068650","https://openalex.org/W2063438554","https://openalex.org/W2084341401","https://openalex.org/W2095242101","https://openalex.org/W2098339052","https://openalex.org/W2099471712","https://openalex.org/W2100771357","https://openalex.org/W2102605133","https://openalex.org/W2105101328","https://openalex.org/W2109200236","https://openalex.org/W2110119381","https://openalex.org/W2115907784","https://openalex.org/W2117223204","https://openalex.org/W2118527252","https://openalex.org/W2126579184","https://openalex.org/W2142194269","https://openalex.org/W2147615062","https://openalex.org/W2150354123","https://openalex.org/W2155893237","https://openalex.org/W2156303437","https://openalex.org/W2173520492","https://openalex.org/W2175354415","https://openalex.org/W2178768799","https://openalex.org/W2185953016","https://openalex.org/W2187772033","https://openalex.org/W2194775991","https://openalex.org/W2220963900","https://openalex.org/W2221298318","https://openalex.org/W2221898772","https://openalex.org/W2293078015","https://openalex.org/W2295107390","https://openalex.org/W2308045930","https://openalex.org/W2337252826","https://openalex.org/W2342437993","https://openalex.org/W2342662179","https://openalex.org/W2342776425","https://openalex.org/W2398000642","https://openalex.org/W2412782625","https://openalex.org/W2424778531","https://openalex.org/W2460134573","https://openalex.org/W2463824207","https://openalex.org/W2471143248","https://openalex.org/W2472970127","https://openalex.org/W2473415337","https://openalex.org/W2477205648","https://openalex.org/W2507009361","https://openalex.org/W2519284461","https://openalex.org/W2519328139","https://openalex.org/W2529163075","https://openalex.org/W2548275288","https://openalex.org/W2548527721","https://openalex.org/W2552414813","https://openalex.org/W2559265498","https://openalex.org/W2566933410","https://openalex.org/W2570343428","https://openalex.org/W2585747585","https://openalex.org/W2593722617","https://openalex.org/W2593768305","https://openalex.org/W2597958930","https://openalex.org/W2600081845","https://openalex.org/W2601113512","https://openalex.org/W2604113307","https://openalex.org/W2605300166","https://openalex.org/W2606461407","https://openalex.org/W2606831796","https://openalex.org/W2607119937","https://openalex.org/W2607566495","https://openalex.org/W2647925446","https://openalex.org/W2738136547","https://openalex.org/W2738853914","https://openalex.org/W2739251211","https://openalex.org/W2746726611","https://openalex.org/W2755876276","https://openalex.org/W2773514261","https://openalex.org/W2776207810","https://openalex.org/W2782776028","https://openalex.org/W2806331055","https://openalex.org/W2893390896","https://openalex.org/W2894668550","https://openalex.org/W2895240652","https://openalex.org/W2949117887","https://openalex.org/W2949847866","https://openalex.org/W2962793481","https://openalex.org/W2962876901","https://openalex.org/W2963045696","https://openalex.org/W2963073614","https://openalex.org/W2963125871","https://openalex.org/W2963155035","https://openalex.org/W2963170156","https://openalex.org/W2963216700","https://openalex.org/W2963247196","https://openalex.org/W2963321993","https://openalex.org/W2963370182","https://openalex.org/W2963373786","https://openalex.org/W2963391479","https://openalex.org/W2963403868","https://openalex.org/W2963420272","https://openalex.org/W2963446712","https://openalex.org/W2963470893","https://openalex.org/W2963524571","https://openalex.org/W2963603913","https://openalex.org/W2963670239","https://openalex.org/W2963720581","https://openalex.org/W2963795442","https://openalex.org/W2963823258","https://openalex.org/W2963840672","https://openalex.org/W2963881378","https://openalex.org/W2963980128","https://openalex.org/W2964008341","https://openalex.org/W2964089981","https://openalex.org/W2964107628","https://openalex.org/W2964214371","https://openalex.org/W2964216549","https://openalex.org/W2964288706","https://openalex.org/W3100481960","https://openalex.org/W3106250896"],"related_works":["https://openalex.org/W2976027342","https://openalex.org/W2979789540","https://openalex.org/W2982421428","https://openalex.org/W3097372983","https://openalex.org/W2953149322","https://openalex.org/W2955004171","https://openalex.org/W2569451645","https://openalex.org/W3186284970","https://openalex.org/W2289645343","https://openalex.org/W3003140480","https://openalex.org/W2974318207","https://openalex.org/W2946810267","https://openalex.org/W3042172445","https://openalex.org/W3207853011","https://openalex.org/W2968986469","https://openalex.org/W3033974463","https://openalex.org/W2990314295","https://openalex.org/W3037363983","https://openalex.org/W3200455234","https://openalex.org/W2936306707"],"abstract_inverted_index":{"Action":[0,109,412,562,661],"understanding":[1,32,46,74,91,123,662,670,711,758,817,981,1026,1034],"is":[2,11,167,193,292,467,520,583,587,733,751,809],"key":[3,730],"to":[4,151,161,169,263,281,326,341,354,381,400,407,507,522,537,545,568,597,610,629,654,704,707,718,752,766,811,842,846,855,895,973,1016],"automatically":[5],"analyzing":[6],"video":[7,212,581,684,829,867,911,940,979,1000,1024,1036],"content":[8],"and":[9,148,186,219,271,308,322,332,346,404,429,448,463,513,539,644,649,699,763,832,872,886,905,921,944,982,1003,1027],"thus":[10,149],"important":[12,81,168,588,781,857],"for":[13,44,72,90,104,134,209,267,359,383,392,756,819,858,876,978,1031],"many":[14,590],"real-world":[15],"applications":[16,591],"such":[17,495,592,988],"as":[18,196,265,295,593,747,989],"autonomous":[19],"driving":[20],"car,":[21],"robot-assisted":[22],"care,":[23],"etc.":[24],"Therefore,":[25],"in":[26,57,75,84,114,118,175,320,324,414,436,565,589,614,663,692,695,712,716,743,759,771,783,791,865,908,985,1007],"the":[27,37,53,97,135,144,197,210,216,268,277,296,347,360,370,384,393,415,443,457,468,475,484,492,510,517,523,548,571,576,612,636,656,664,673,682,713,724,744,760,768,772,785,792,826,844,862,909,934,956,964,1018,1021,1029],"computer":[28],"vision":[29],"field,":[30],"action":[31,45,73,122,137,157,184,188,191,199,207,224,260,284,290,298,362,395,460,480,572,582,642,647,669,710,726,757,816,859,880,915,980,1025],"has":[33,67,1013],"been":[34,853],"one":[35],"of":[36,99,156,222,238,422,427,445,478,494,550,561,658,697,828,924,952,996,1020,1023,1035],"fundamental":[38],"research":[39,82,731,1012],"topics.":[40],"Most":[41],"conventional":[42],"methods":[43,89,103,609,671,942,957],"are":[47,132,432,868,971],"based":[48,88,121,623,900,941],"on":[49,96,126,377,402,409,456,533,541,575,624,901],"hand-crafted":[50],"features.":[51],"Like":[52],"recent":[54],"advances":[55],"seen":[56],"image":[58,62,687],"classification,":[59],"object":[60],"detection,":[61],"captioning,":[63],"etc,":[64],"deep":[65,86,101,119,423],"learning":[66,87,102,120,424],"become":[68],"a":[69,244,255,273,311,337,496,502,555,579,813,834,884,922],"popular":[70],"approach":[71],"video.":[76,1010],"However,":[77],"there":[78],"remain":[79],"several":[80],"challenges":[83,613,967],"developing":[85,734,784],"actions.":[92],"This":[93,689],"thesis":[94],"focuses":[95,125],"development":[98],"effective":[100,232,735,837],"solving":[105],"three":[106,239,607,914,965],"major":[107,966],"challenges.":[108],"detection":[110,185,192,291,413,727],"at":[111,172,344],"fine":[112],"granularities":[113,174],"time:":[115],"Previous":[116],"work":[117],"mainly":[124],"exploring":[127],"various":[128,986],"backbone":[129,736,786],"networks":[130,737,787],"that":[131,250,471,516,738,788,892,928,958],"designed":[133],"video-level":[136,485],"classification":[138,256,261,279],"task.":[139,300,364],"These":[140],"did":[141],"not":[142,204,367,433,873],"explore":[143],"fine-grained":[145],"temporal":[146,198,333,361,394,459,476,637,991],"characteristics":[147],"failed":[150],"produce":[152],"temporally":[153],"precise":[154,356],"estimation":[155],"boundaries.":[158],"In":[159,177,286,438,775],"order":[160,717],"understand":[162],"actions":[163,171,343],"more":[164,975],"comprehensively,":[165],"it":[166,808],"detect":[170,355],"finer":[173],"time.":[176],"Part":[178,439,776],"I,":[179],"we":[180,229,441,454,500,514,553,605,778,926,970],"study":[181],"both":[182,329],"segment-level":[183,511],"frame-level":[187,289,345,348],"detection.":[189],"Segment-level":[190],"usually":[194],"formulated":[195,294],"localization":[200,269,274,363,396,461],"task,":[201,387],"which":[202,316,431,466],"requires":[203],"only":[205,368,483],"recognizing":[206],"categories":[208],"whole":[211],"but":[213,388,807,861],"also":[214,389],"localizing":[215],"start":[217,573,643],"time":[218,221,325,574],"end":[220],"each":[223,283,479,820],"instance.":[225,285],"To":[226,490],"this":[227],"end,":[228],"propose":[230,464,606],"an":[231],"multi-stage":[233],"framework":[234,470,889],"called":[235,558,890],"Segment-CNN":[236],"consisting":[237],"segment-based":[240],"3D":[241],"ConvNets:":[242],"(1)":[243,618,990],"proposal":[245],"network":[246,257,275,280,340],"identifies":[247],"candidate":[248],"segments":[249],"may":[251,690,798],"contain":[252],"actions;":[253],"(2)":[254,633,993],"learns":[258],"one-vs-all":[259],"model":[262,328,818],"serve":[264],"initialization":[266],"network;":[270],"(3)":[272,650,998],"fine-tunes":[276],"learned":[278],"localize":[282],"another":[287],"approach,":[288],"effectively":[293],"per-frame":[297,385],"labeling":[299,386],"We":[301,335,823,882],"combine":[302],"two":[303,780,950],"reverse":[304],"operations":[305],"(i.e.":[306],"convolution":[307],"deconvolution)":[309],"into":[310,685],"joint":[312],"Convolutional-De-Convolutional":[313],"(CDC)":[314],"filter,":[315],"simultaneously":[317],"conducts":[318],"downsampling":[319],"space":[321],"upsampling":[323],"jointly":[327],"high-level":[330],"semantics":[331],"dynamics.":[334],"design":[336,501],"novel":[338,503,556,608,885],"CDC":[339],"predict":[342,474,896],"predictions":[349],"can":[350,472,739,893,931],"be":[351,799,856],"further":[352],"used":[353],"segment":[357],"boundary":[358,477,497,525],"Our":[365,528,749,1011],"method":[366,529,841],"improves":[369,390,531],"state-of-the-art":[371,725,938],"mean":[372],"Average":[373],"Precision":[374],"(mAP)":[375],"result":[376,691],"THUMOS\u201914":[378,403],"from":[379,398,405,535,543],"41.3%":[380],"44.4%":[382],"mAP":[391,532,540],"task":[397,462,557],"19.0%":[399],"23.3%":[401],"16.4%":[406],"23.8%":[408],"ActivityNet":[410,542],"v1.3.":[411],"constrained":[416],"scenarios:":[417],"The":[418,667,729],"usual":[419],"training":[420,493,615,659,838],"process":[421],"models":[425,754,977],"consists":[426],"supervision":[428,447,512],"data,":[430,552],"always":[434],"available":[435,487],"reality.":[437],"II,":[440],"consider":[442],"scenarios":[444],"incomplete":[446,449,452,551],"data.":[450,660],"For":[451,547],"supervision,":[453,997],"focus":[455],"weakly-supervised":[458],"AutoLoc":[465],"first":[469,680],"directly":[473,708,740,877],"instance":[481],"with":[482,943],"annotations":[486],"during":[488],"training.":[489],"enable":[491,569],"prediction":[498,526],"model,":[499],"Outer-Inner-Contrastive":[504],"(OIC)":[505],"loss":[506,519],"help":[508],"discover":[509],"prove":[515],"OIC":[518],"differentiable":[521],"underlying":[524],"model.":[527],"significantly":[530,932,1015],"THUMOS14":[534],"13.7%":[536],"21.2%":[538],"7.4%":[544],"27.3%.":[546],"scenario":[549],"formulate":[554],"Online":[559],"Detection":[560],"Start":[563],"(ODAS)":[564],"streaming":[566],"videos":[567,797],"detecting":[570],"fly":[577],"when":[578],"live":[580,999],"just":[584],"starting.":[585],"ODAS":[586,616],"early":[594],"alert":[595],"generation":[596,622],"allow":[598],"timely":[599],"security":[600],"or":[601,804],"emergency":[602],"response.":[603],"Specifically,":[604],"address":[611,779],"models:":[617],"hard":[619],"negative":[620],"samples":[621],"Generative":[625],"Adversarial":[626],"Network":[627],"(GAN)":[628],"distinguish":[630],"ambiguous":[631],"background,":[632],"explicitly":[634],"modeling":[635],"consistency":[638],"between":[639,937],"data":[640,645,742,839],"around":[641],"succeeding":[646],"start,":[648],"adaptive":[651],"sampling":[652],"strategy":[653],"handle":[655],"scarcity":[657],"compressed":[665,683,714,745,773,793,796,866,910,939,1009],"domain:":[666],"mainstream":[668],"including":[672],"aforementioned":[674],"techniques":[675],"developed":[676,755],"by":[677,801],"us":[678],"require":[679],"decoding":[681],"RGB":[686],"frames.":[688],"significant":[693],"cost":[694],"terms":[696],"storage":[698],"computation.":[700],"Recently,":[701],"researchers":[702],"started":[703],"investigate":[705],"how":[706],"perform":[709],"domain":[715,746,762],"achieve":[719],"high":[720],"efficiency":[721,1006],"while":[722,948],"maintaining":[723],"accuracy.":[728],"challenge":[732],"take":[741,753],"input.":[748],"baseline":[750],"decoded":[761],"adapt":[764],"them":[765],"attack":[767],"same":[769],"tasks":[770],"domain.":[774,794],"III,":[777],"issues":[782],"exclusively":[789],"operate":[790],"First,":[795],"produced":[800],"different":[802,814,821],"encoders":[803],"encoding":[805],"parameters,":[806],"impractical":[810],"train":[812],"compressed-domain":[815],"format.":[822],"experimentally":[824],"analyze":[825],"effect":[827],"encoder":[830,847],"variation":[831],"develop":[833,883,974],"simple":[835],"yet":[836],"preparation":[840],"alleviate":[843],"sensitivity":[845],"variation.":[848],"Second,":[849],"motion":[850,863,898,903],"cues":[851,899],"have":[852],"shown":[854],"understanding,":[860],"vectors":[864,904],"often":[869],"very":[870],"noisy":[871,902],"discriminative":[874,897],"enough":[875],"performing":[878],"accurate":[879],"understanding.":[881],"highly":[887],"efficient":[888],"DMC-Net":[891,930],"learn":[894],"residual":[906],"errors":[907],"streams.":[912],"On":[913],"recognition":[916],"benchmarks,":[917],"namely":[918],"HMDB-51,":[919],"UCF101":[920],"subset":[923],"Kinetics,":[925],"demonstrate":[927],"our":[929],"shorten":[933],"performance":[935,984],"gap":[936],"without":[945],"optical":[946,960],"flow,":[947],"being":[949],"orders":[951],"magnitude":[953],"faster":[954],"than":[955],"use":[959],"flow.":[961],"By":[962],"addressing":[963],"mentioned":[968],"above,":[969],"able":[972],"robust":[976],"improve":[983],"dimensions,":[987],"precision,":[992],"required":[994],"levels":[995],"analysis":[1001],"ability,":[1002],"finally":[1004],"(4)":[1005],"processing":[1008],"contributed":[1014],"advancing":[1017],"state":[1019],"art":[1022],"expanding":[1028],"foundation":[1030],"comprehensive":[1032],"semantic":[1033],"content.":[1037]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
