{"id":"https://openalex.org/W7137837283","doi":"https://doi.org/10.1609/aaai.v40i4.37270","title":"AerialMind: Towards Referring Multi-Object Tracking in UAV Scenarios","display_name":"AerialMind: Towards Referring Multi-Object Tracking in UAV Scenarios","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7137837283","doi":"https://doi.org/10.1609/aaai.v40i4.37270"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i4.37270","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i4.37270","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/37270/41232","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/37270/41232","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5070100004","display_name":"C. Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I4210162190","display_name":"China University of Petroleum, East China","ror":"https://ror.org/05gbn2817","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210162190"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Chenglizhao Chen","raw_affiliation_strings":["Qingdao Institute of Software, College of Computer Science and Technology, China University of Petroleum (East China)\nShandong Key Laboratory of Intelligent Oil & Gas Industrial Software"],"affiliations":[{"raw_affiliation_string":"Qingdao Institute of Software, College of Computer Science and Technology, China University of Petroleum (East China)\nShandong Key Laboratory of Intelligent Oil & Gas Industrial Software","institution_ids":["https://openalex.org/I4210162190"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129678323","display_name":"Shaofeng Liang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210162190","display_name":"China University of Petroleum, East China","ror":"https://ror.org/05gbn2817","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210162190"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shaofeng Liang","raw_affiliation_strings":["Qingdao Institute of Software, College of Computer Science and Technology, China University of Petroleum (East China)\nShandong Key Laboratory of Intelligent Oil & Gas Industrial Software"],"affiliations":[{"raw_affiliation_string":"Qingdao Institute of Software, College of Computer Science and Technology, China University of Petroleum (East China)\nShandong Key Laboratory of Intelligent Oil & Gas Industrial Software","institution_ids":["https://openalex.org/I4210162190"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129689336","display_name":"Runwei Guan","orcid":null},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]},{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Runwei Guan","raw_affiliation_strings":["Thrust of Artificial Intelligence, The Hong Kong University of Science and Technology (Guangzhou)"],"affiliations":[{"raw_affiliation_string":"Thrust of Artificial Intelligence, The Hong Kong University of Science and Technology (Guangzhou)","institution_ids":["https://openalex.org/I200769079","https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042984551","display_name":"Xiaolou Sun","orcid":"https://orcid.org/0009-0007-3597-3872"},"institutions":[{"id":"https://openalex.org/I4210155350","display_name":"Purple Mountain Laboratories","ror":"https://ror.org/04zcbk583","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210155350"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaolou Sun","raw_affiliation_strings":["Purple Mountain Laboratories"],"affiliations":[{"raw_affiliation_string":"Purple Mountain Laboratories","institution_ids":["https://openalex.org/I4210155350"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047910725","display_name":"Haocheng Zhao","orcid":"https://orcid.org/0000-0001-8932-8106"},"institutions":[{"id":"https://openalex.org/I69356397","display_name":"Xi\u2019an Jiaotong-Liverpool University","ror":"https://ror.org/03zmrmn05","country_code":"CN","type":"education","lineage":["https://openalex.org/I69356397"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haocheng Zhao","raw_affiliation_strings":["School of Advanced Technology, Xi'an Jiaotong-Liverpool University"],"affiliations":[{"raw_affiliation_string":"School of Advanced Technology, Xi'an Jiaotong-Liverpool University","institution_ids":["https://openalex.org/I69356397"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129667198","display_name":"Haiyun Jiang","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haiyun Jiang","raw_affiliation_strings":["School of Automation and Intelligent Sensing, Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"School of Automation and Intelligent Sensing, Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129702704","display_name":"Tao Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I86467917","display_name":"James Cook University","ror":"https://ror.org/04gsp2c11","country_code":"AU","type":"education","lineage":["https://openalex.org/I86467917"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Tao Huang","raw_affiliation_strings":["College of Science and Engineering, James Cook University"],"affiliations":[{"raw_affiliation_string":"College of Science and Engineering, James Cook University","institution_ids":["https://openalex.org/I86467917"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129679877","display_name":"Henghui Ding","orcid":null},"institutions":[{"id":"https://openalex.org/I4210164862","display_name":"Artificial Intelligence in Medicine (Canada)","ror":"https://ror.org/05p590m36","country_code":"CA","type":"company","lineage":["https://openalex.org/I4210164862"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Henghui Ding","raw_affiliation_strings":["Institute of Big Data, College of Computer Science and Artificial Intelligence, Fudan University"],"affiliations":[{"raw_affiliation_string":"Institute of Big Data, College of Computer Science and Artificial Intelligence, Fudan University","institution_ids":["https://openalex.org/I4210164862"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5083874423","display_name":"Qing\u2010Long Han","orcid":"https://orcid.org/0000-0002-7207-0716"},"institutions":[{"id":"https://openalex.org/I57093077","display_name":"Swinburne University of Technology","ror":"https://ror.org/031rekg67","country_code":"AU","type":"education","lineage":["https://openalex.org/I57093077"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Qing-Long Han","raw_affiliation_strings":["School of Engineering, Swinburne University of Technology, Melbourne"],"affiliations":[{"raw_affiliation_string":"School of Engineering, Swinburne University of Technology, Melbourne","institution_ids":["https://openalex.org/I57093077"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5070100004"],"corresponding_institution_ids":["https://openalex.org/I4210162190"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.05746269,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"4","first_page":"2805","last_page":"2813"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.5889999866485596,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.5889999866485596,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.09549999982118607,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11133","display_name":"UAV Applications and Optimization","score":0.07720000296831131,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6675000190734863},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.46380001306533813},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.43290001153945923},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4221999943256378},{"id":"https://openalex.org/keywords/drone","display_name":"Drone","score":0.4196000099182129},{"id":"https://openalex.org/keywords/video-tracking","display_name":"Video tracking","score":0.40639999508857727},{"id":"https://openalex.org/keywords/expansive","display_name":"Expansive","score":0.39890000224113464},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.3853999972343445},{"id":"https://openalex.org/keywords/tracking","display_name":"Tracking (education)","score":0.3709999918937683}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6743999719619751},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6675000190734863},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.580299973487854},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.46380001306533813},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.43290001153945923},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4221999943256378},{"id":"https://openalex.org/C59519942","wikidata":"https://www.wikidata.org/wiki/Q650665","display_name":"Drone","level":2,"score":0.4196000099182129},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.40639999508857727},{"id":"https://openalex.org/C2780502288","wikidata":"https://www.wikidata.org/wiki/Q28838156","display_name":"Expansive","level":3,"score":0.39890000224113464},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.3853999972343445},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3806999921798706},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.3709999918937683},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.3582000136375427},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.3440000116825104},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.33869999647140503},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3273000121116638},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.3262999951839447},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.3240000009536743},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.31859999895095825},{"id":"https://openalex.org/C54525549","wikidata":"https://www.wikidata.org/wiki/Q2553445","display_name":"Weaving","level":2,"score":0.3151000142097473},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.3138999938964844},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3116999864578247},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.3109000027179718},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.30480000376701355},{"id":"https://openalex.org/C154586513","wikidata":"https://www.wikidata.org/wiki/Q4420972","display_name":"Tracking system","level":3,"score":0.2955999970436096},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.2838999927043915},{"id":"https://openalex.org/C2777735758","wikidata":"https://www.wikidata.org/wiki/Q817765","display_name":"Path (computing)","level":2,"score":0.27649998664855957},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.2614000141620636},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.2590000033378601},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.2572000026702881},{"id":"https://openalex.org/C100609095","wikidata":"https://www.wikidata.org/wiki/Q1335050","display_name":"Embodied cognition","level":2,"score":0.25450000166893005}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i4.37270","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i4.37270","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/37270/41232","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i4.37270","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i4.37270","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/37270/41232","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7137837283.pdf","grobid_xml":"https://content.openalex.org/works/W7137837283.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Referring":[0],"Multi-Object":[1],"Tracking":[2],"(RMOT)":[3],"aims":[4,110],"to":[5,31,38,64,82,111],"achieve":[6],"precise":[7],"object":[8],"detection":[9],"and":[10,43,47,61,154,170],"tracking":[11,46],"through":[12],"natural":[13,92],"language":[14,93],"instructions,":[15],"representing":[16],"a":[17,145],"fundamental":[18],"capability":[19],"for":[20,75,86],"intelligent":[21,87],"robotic":[22],"systems.":[23],"However,":[24],"current":[25],"RMOT":[26,104],"research":[27,114],"remains":[28],"mostly":[29],"confined":[30],"ground-level":[32],"scenarios,":[33,108],"which":[34,78,109],"constrains":[35],"their":[36,57],"ability":[37],"capture":[39],"broad-scale":[40],"scene":[41],"contexts":[42],"perform":[44],"comprehensive":[45],"path":[48],"planning.":[49],"In":[50],"contrast,":[51],"Unmanned":[52],"Aerial":[53],"Vehicles":[54],"(UAVs)":[55],"leverage":[56],"expansive":[58],"aerial":[59,88],"perspectives":[60],"superior":[62],"maneuverability":[63],"enable":[65],"wide-area":[66],"surveillance.":[67],"Moreover,":[68],"UAVs":[69],"have":[70],"emerged":[71],"as":[72],"critical":[73],"platforms":[74],"Embodied":[76],"Intelligence,":[77],"has":[79],"given":[80],"rise":[81],"an":[83,122],"unprecedented":[84],"demand":[85],"systems":[89],"capable":[90],"of":[91,158,167,173],"interaction.":[94],"To":[95,116],"this":[96,113],"end,":[97],"we":[98,120,141],"introduce":[99],"AerialMind,":[100],"the":[101,156,164,171],"first":[102],"large-scale":[103],"benchmark":[105],"in":[106],"UAV":[107,159],"bridge":[112],"gap.":[115],"facilitate":[117],"its":[118],"construction,":[119],"develop":[121],"innovative":[123],"semi-automated":[124],"collaborative":[125],"agent-based":[126],"labeling":[127],"assistant":[128],"(COALA)":[129],"framework":[130],"that":[131,148],"significantly":[132],"reduces":[133],"labor":[134],"costs":[135],"while":[136],"maintaining":[137],"annotation":[138],"quality.":[139],"Furthermore,":[140],"propose":[142],"HawkEyeTrack":[143],"(HETrack),":[144],"novel":[146],"method":[147],"collaboratively":[149],"enhances":[150],"vision-language":[151],"representation":[152],"learning":[153],"improves":[155],"perception":[157],"scenarios.":[160],"Comprehensive":[161],"experiments":[162],"validated":[163],"challenging":[165],"nature":[166],"our":[168,174],"dataset":[169],"effectiveness":[172],"method.":[175]},"counts_by_year":[],"updated_date":"2026-03-20T20:47:17.329874","created_date":"2026-03-18T00:00:00"}
