{"id":"https://openalex.org/W4405785515","doi":"https://doi.org/10.1109/iros58592.2024.10802074","title":"SCP: Soft Conditional Prompt Learning for Aerial Video Action Recognition","display_name":"SCP: Soft Conditional Prompt Learning for Aerial Video Action Recognition","publication_year":2024,"publication_date":"2024-10-14","ids":{"openalex":"https://openalex.org/W4405785515","doi":"https://doi.org/10.1109/iros58592.2024.10802074"},"language":"en","primary_location":{"id":"doi:10.1109/iros58592.2024.10802074","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros58592.2024.10802074","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100709597","display_name":"Xijun Wang","orcid":"https://orcid.org/0000-0003-3504-9763"},"institutions":[{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Xijun Wang","raw_affiliation_strings":["University of Maryland,Dept. of Computer Science,College Park,MD,USA"],"affiliations":[{"raw_affiliation_string":"University of Maryland,Dept. of Computer Science,College Park,MD,USA","institution_ids":["https://openalex.org/I66946132"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083695837","display_name":"Ruiqi Xian","orcid":null},"institutions":[{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ruiqi Xian","raw_affiliation_strings":["University of Maryland,Dept. of Electrical and Computer Engineering,College Park,MD,USA"],"affiliations":[{"raw_affiliation_string":"University of Maryland,Dept. of Electrical and Computer Engineering,College Park,MD,USA","institution_ids":["https://openalex.org/I66946132"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014003390","display_name":"Tianrui Guan","orcid":"https://orcid.org/0000-0002-6892-9778"},"institutions":[{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tianrui Guan","raw_affiliation_strings":["University of Maryland,Dept. of Computer Science,College Park,MD,USA"],"affiliations":[{"raw_affiliation_string":"University of Maryland,Dept. of Computer Science,College Park,MD,USA","institution_ids":["https://openalex.org/I66946132"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027505093","display_name":"Fuxiao Liu","orcid":"https://orcid.org/0000-0002-3078-0613"},"institutions":[{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Fuxiao Liu","raw_affiliation_strings":["University of Maryland,Dept. of Computer Science,College Park,MD,USA"],"affiliations":[{"raw_affiliation_string":"University of Maryland,Dept. of Computer Science,College Park,MD,USA","institution_ids":["https://openalex.org/I66946132"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5004194238","display_name":"Dinesh Manocha","orcid":"https://orcid.org/0000-0001-7047-9801"},"institutions":[{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dinesh Manocha","raw_affiliation_strings":["University of Maryland,Dept. of Computer Science,College Park,MD,USA"],"affiliations":[{"raw_affiliation_string":"University of Maryland,Dept. of Computer Science,College Park,MD,USA","institution_ids":["https://openalex.org/I66946132"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100709597"],"corresponding_institution_ids":["https://openalex.org/I66946132"],"apc_list":null,"apc_paid":null,"fwci":0.7151,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.73220476,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"10967","last_page":"10974"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7434360384941101},{"id":"https://openalex.org/keywords/action-recognition","display_name":"Action recognition","score":0.553613007068634},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5175106525421143},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5148026347160339},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.42080947756767273},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.33576714992523193}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7434360384941101},{"id":"https://openalex.org/C2987834672","wikidata":"https://www.wikidata.org/wiki/Q4677630","display_name":"Action recognition","level":3,"score":0.553613007068634},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5175106525421143},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5148026347160339},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.42080947756767273},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.33576714992523193},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros58592.2024.10802074","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros58592.2024.10802074","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320316514","display_name":"Arm","ror":"https://ror.org/04mmhzs81"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":56,"referenced_works":["https://openalex.org/W1522734439","https://openalex.org/W1744759976","https://openalex.org/W1944615693","https://openalex.org/W1966385142","https://openalex.org/W1983364832","https://openalex.org/W2016053056","https://openalex.org/W2608988379","https://openalex.org/W2625286981","https://openalex.org/W2799176631","https://openalex.org/W2954521024","https://openalex.org/W2963524571","https://openalex.org/W2970476646","https://openalex.org/W2990503944","https://openalex.org/W3009803092","https://openalex.org/W3034572008","https://openalex.org/W3035303837","https://openalex.org/W3044438666","https://openalex.org/W3089971466","https://openalex.org/W3095374178","https://openalex.org/W3098267758","https://openalex.org/W3100283070","https://openalex.org/W3108892828","https://openalex.org/W3108975329","https://openalex.org/W3126721948","https://openalex.org/W3138516171","https://openalex.org/W3166986030","https://openalex.org/W3173621652","https://openalex.org/W3185341429","https://openalex.org/W3198377975","https://openalex.org/W4205991051","https://openalex.org/W4214589115","https://openalex.org/W4214612132","https://openalex.org/W4214614183","https://openalex.org/W4226058394","https://openalex.org/W4247400036","https://openalex.org/W4309811444","https://openalex.org/W4311348313","https://openalex.org/W4312480274","https://openalex.org/W4312769131","https://openalex.org/W4312895250","https://openalex.org/W4312947882","https://openalex.org/W4383108418","https://openalex.org/W4383108457","https://openalex.org/W4390190667","https://openalex.org/W4390874575","https://openalex.org/W4393148572","https://openalex.org/W4394597954","https://openalex.org/W4394625712","https://openalex.org/W4401042875","https://openalex.org/W6682864246","https://openalex.org/W6778883912","https://openalex.org/W6846242362","https://openalex.org/W6854454714","https://openalex.org/W6854555012","https://openalex.org/W6859788467","https://openalex.org/W6873427608"],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"We":[0,75,157,182],"present":[1,76],"a":[2,77,89,133,174],"new":[3],"learning":[4,17,119],"approach,":[5],"Soft":[6],"Conditional":[7],"Prompt":[8],"Learning":[9],"(SCP),":[10],"which":[11,148],"leverages":[12],"the":[13,29,36,40,48,72,100,104,114,140,168,187],"strengths":[14],"of":[15,31,91,150],"prompt":[16,80,92,127],"for":[18,51],"aerial":[19,141],"video":[20,96,142],"action":[21,30],"recognition.":[22],"Our":[23,55],"approach":[24,161],"is":[25],"designed":[26],"to":[27,70,84,166],"predict":[28],"each":[32],"agent":[33],"by":[34],"helping":[35],"models":[37,69],"focus":[38],"on":[39,139,162,179],"descriptions":[41],"or":[42],"instructions":[43],"associated":[44],"with":[45,103,152],"actions":[46],"in":[47],"input":[49],"videos":[50,165],"aerial/robot":[52],"visual":[53,64],"perception.":[54],"formulation":[56],"supports":[57],"various":[58],"prompts,":[59,62],"including":[60],"learnable":[61],"auxiliary":[63],"information,":[65],"and":[66,124,154,170,172],"large":[67],"vision":[68],"improve":[71],"recognition":[73],"performance.":[74],"soft":[78],"conditional":[79],"method":[81,185],"that":[82,112],"learns":[83],"dynamically":[85],"generate":[86],"prompts":[87,111],"from":[88],"pool":[90],"experts":[93,122],"under":[94],"different":[95],"inputs.":[97],"By":[98],"sharing":[99],"same":[101],"objective":[102],"task,":[105],"our":[106,160,184],"proposed":[107],"SCP":[108],"can":[109],"optimize":[110],"guide":[113],"model\u2019s":[115],"predictions":[116],"while":[117],"explicitly":[118],"input-invariant":[120],"(prompt":[121],"pool)":[123],"input-specific":[125],"(data-dependent)":[126],"knowledge.":[128],"In":[129],"practice,":[130],"we":[131],"observe":[132],"3.17":[134],"\u2212":[135,176],"10.2%":[136],"accuracy":[137],"improvement":[138,178],"datasets":[143],"(Okutama":[144],"[1],":[145],"NECDrone":[146],"[2]),":[147],"consist":[149],"scenes":[151],"single-agent":[153],"multi-agent":[155],"actions.":[156],"further":[158],"evaluate":[159],"ground":[163],"camera":[164],"verify":[167],"effectiveness":[169],"generalization":[171],"achieve":[173],"1.0":[175],"3.6%":[177],"SSV2":[180],"[3].":[181],"integrate":[183],"into":[186],"ROS2":[188],"as":[189],"well.":[190]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2}],"updated_date":"2026-04-23T09:07:50.710637","created_date":"2025-10-10T00:00:00"}
