{"id":"https://openalex.org/W4399257636","doi":"https://doi.org/10.1145/3653804.3654608","title":"DroneGPT: Zero-shot Video Question Answering For Drones","display_name":"DroneGPT: Zero-shot Video Question Answering For Drones","publication_year":2024,"publication_date":"2024-01-19","ids":{"openalex":"https://openalex.org/W4399257636","doi":"https://doi.org/10.1145/3653804.3654608"},"language":"en","primary_location":{"id":"doi:10.1145/3653804.3654608","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3653804.3654608","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference on Computer Vision and Deep Learning","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5055762601","display_name":"Hongjie Qiu","orcid":"https://orcid.org/0009-0009-6463-0726"},"institutions":[{"id":"https://openalex.org/I50760025","display_name":"Hangzhou Dianzi University","ror":"https://ror.org/0576gt767","country_code":"CN","type":"education","lineage":["https://openalex.org/I50760025"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hongjie Qiu","raw_affiliation_strings":["School of Computer Science, Hangzhou Dianzi University, China"],"raw_orcid":"https://orcid.org/0009-0009-6463-0726","affiliations":[{"raw_affiliation_string":"School of Computer Science, Hangzhou Dianzi University, China","institution_ids":["https://openalex.org/I50760025"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081070071","display_name":"Jinqiang Li","orcid":"https://orcid.org/0009-0007-3583-4602"},"institutions":[{"id":"https://openalex.org/I50760025","display_name":"Hangzhou Dianzi University","ror":"https://ror.org/0576gt767","country_code":"CN","type":"education","lineage":["https://openalex.org/I50760025"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinqiang Li","raw_affiliation_strings":["School of Computer Science, Hangzhou Dianzi University, China"],"raw_orcid":"https://orcid.org/0009-0007-3583-4602","affiliations":[{"raw_affiliation_string":"School of Computer Science, Hangzhou Dianzi University, China","institution_ids":["https://openalex.org/I50760025"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Junhao Gan","orcid":"https://orcid.org/0009-0000-3047-183X"},"institutions":[{"id":"https://openalex.org/I50760025","display_name":"Hangzhou Dianzi University","ror":"https://ror.org/0576gt767","country_code":"CN","type":"education","lineage":["https://openalex.org/I50760025"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junhao Gan","raw_affiliation_strings":["School of Computer Science, Hangzhou Dianzi University, China"],"raw_orcid":"https://orcid.org/0009-0000-3047-183X","affiliations":[{"raw_affiliation_string":"School of Computer Science, Hangzhou Dianzi University, China","institution_ids":["https://openalex.org/I50760025"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101812130","display_name":"Shuwen Zheng","orcid":"https://orcid.org/0009-0002-7332-1836"},"institutions":[{"id":"https://openalex.org/I50760025","display_name":"Hangzhou Dianzi University","ror":"https://ror.org/0576gt767","country_code":"CN","type":"education","lineage":["https://openalex.org/I50760025"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuwen Zheng","raw_affiliation_strings":["School of Computer Science, Hangzhou Dianzi University, China"],"raw_orcid":"https://orcid.org/0009-0002-7332-1836","affiliations":[{"raw_affiliation_string":"School of Computer Science, Hangzhou Dianzi University, China","institution_ids":["https://openalex.org/I50760025"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5051997091","display_name":"Liqi Yan","orcid":"https://orcid.org/0000-0002-7077-4947"},"institutions":[{"id":"https://openalex.org/I50760025","display_name":"Hangzhou Dianzi University","ror":"https://ror.org/0576gt767","country_code":"CN","type":"education","lineage":["https://openalex.org/I50760025"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liqi Yan","raw_affiliation_strings":["School of Computer Science, Hangzhou Dianzi University, China"],"raw_orcid":"https://orcid.org/0000-0002-7077-4947","affiliations":[{"raw_affiliation_string":"School of Computer Science, Hangzhou Dianzi University, China","institution_ids":["https://openalex.org/I50760025"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5055762601"],"corresponding_institution_ids":["https://openalex.org/I50760025"],"apc_list":null,"apc_paid":null,"fwci":0.4762,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.61226807,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9940999746322632,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/drone","display_name":"Drone","score":0.9577318429946899},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8126720190048218},{"id":"https://openalex.org/keywords/scope","display_name":"Scope (computer science)","score":0.6578061580657959},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6360983848571777},{"id":"https://openalex.org/keywords/shot","display_name":"Shot (pellet)","score":0.6344663500785828},{"id":"https://openalex.org/keywords/modular-design","display_name":"Modular design","score":0.6019905209541321},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5963073372840881},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.5547472834587097},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.4361937344074249},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.422711044549942},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.41297468543052673},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.39016175270080566},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.3780372142791748},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3469089865684509},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.16099563241004944}],"concepts":[{"id":"https://openalex.org/C59519942","wikidata":"https://www.wikidata.org/wiki/Q650665","display_name":"Drone","level":2,"score":0.9577318429946899},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8126720190048218},{"id":"https://openalex.org/C2778012447","wikidata":"https://www.wikidata.org/wiki/Q1034415","display_name":"Scope (computer science)","level":2,"score":0.6578061580657959},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6360983848571777},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.6344663500785828},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.6019905209541321},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5963073372840881},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.5547472834587097},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.4361937344074249},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.422711044549942},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.41297468543052673},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.39016175270080566},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.3780372142791748},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3469089865684509},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.16099563241004944},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3653804.3654608","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3653804.3654608","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference on Computer Vision and Deep Learning","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Climate action","score":0.47999998927116394,"id":"https://metadata.un.org/sdg/13"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W2744822616","https://openalex.org/W2974563605","https://openalex.org/W2997344006","https://openalex.org/W3030163527","https://openalex.org/W3135367836","https://openalex.org/W3147415813","https://openalex.org/W3208645658","https://openalex.org/W4221146106","https://openalex.org/W4224951336","https://openalex.org/W4225323055","https://openalex.org/W4285118488","https://openalex.org/W4293732316","https://openalex.org/W4308241978","https://openalex.org/W4312747027","https://openalex.org/W4324128075","https://openalex.org/W6639102338","https://openalex.org/W6811476558","https://openalex.org/W6846754007"],"related_works":["https://openalex.org/W3157284875","https://openalex.org/W2259406085","https://openalex.org/W2099715052","https://openalex.org/W2147241511","https://openalex.org/W4226247999","https://openalex.org/W4213176082","https://openalex.org/W2187398150","https://openalex.org/W3209772662","https://openalex.org/W4200629926","https://openalex.org/W4220955952"],"abstract_inverted_index":{"With":[0],"the":[1,39,67,103,109,115],"continuous":[2],"development":[3],"and":[4,49,55,87,112],"popularization":[5],"of":[6,43,106,117],"drone":[7,18,57,93],"technology,":[8],"drones":[9,107],"are":[10],"widely":[11],"used":[12],"in":[13,17,66,108],"various":[14],"fields,":[15],"especially":[16],"video":[19,94,110],"applications.":[20],"We":[21,97],"propose":[22],"DroneGPT,":[23],"a":[24],"neural-symbolic":[25],"method":[26],"that":[27,99],"learns":[28],"VISPROG,":[29],"which":[30],"does":[31],"not":[32],"require":[33],"any":[34],"task-specific":[35],"training.":[36],"It":[37],"leverages":[38],"contextual":[40],"learning":[41],"ability":[42],"large":[44],"language":[45,62],"models":[46,75],"to":[47,76,91],"generate":[48],"execute":[50],"modular":[51],"programs,":[52],"solving":[53],"complex":[54],"compositional":[56],"vision":[58,74],"tasks":[59],"given":[60],"natural":[61],"instructions.":[63],"The":[64],"modules":[65],"program":[68],"can":[69,101],"call":[70],"several":[71],"ready-made":[72],"computer":[73],"achieve":[77,92],"object":[78],"detection,":[79],"or":[80],"write":[81],"image":[82],"processing":[83],"programs":[84],"by":[85],"themselves,":[86],"finally":[88],"connect":[89],"them":[90],"question":[95],"answering.":[96],"believe":[98],"DroneGPT":[100],"expand":[102],"task":[104],"scope":[105],"field":[111],"further":[113],"enrich":[114],"functions":[116],"contemporary":[118],"drones.":[119]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
