{"id":"https://openalex.org/W7125955069","doi":"https://doi.org/10.1109/smc58881.2025.11342594","title":"Enhancing Small Object Detection in Aerial Images via Transformer Scaling and Dynamic Fusion","display_name":"Enhancing Small Object Detection in Aerial Images via Transformer Scaling and Dynamic Fusion","publication_year":2025,"publication_date":"2025-10-05","ids":{"openalex":"https://openalex.org/W7125955069","doi":"https://doi.org/10.1109/smc58881.2025.11342594"},"language":null,"primary_location":{"id":"doi:10.1109/smc58881.2025.11342594","is_oa":false,"landing_page_url":"https://doi.org/10.1109/smc58881.2025.11342594","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124090248","display_name":"Kai Zheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Kai Zheng","raw_affiliation_strings":["University of Electronic Science and Technology of China,Shenzhen Institute for Advanced Study,Shenzhen,China,518110"],"affiliations":[{"raw_affiliation_string":"University of Electronic Science and Technology of China,Shenzhen Institute for Advanced Study,Shenzhen,China,518110","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124120582","display_name":"Jintao Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jintao Li","raw_affiliation_strings":["University of Electronic Science and Technology of China,Shenzhen Institute for Advanced Study,Shenzhen,China,518110"],"affiliations":[{"raw_affiliation_string":"University of Electronic Science and Technology of China,Shenzhen Institute for Advanced Study,Shenzhen,China,518110","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124129834","display_name":"Weixuan Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Weixuan Liu","raw_affiliation_strings":["University of Electronic Science and Technology of China,Shenzhen Institute for Advanced Study,Shenzhen,China,518110"],"affiliations":[{"raw_affiliation_string":"University of Electronic Science and Technology of China,Shenzhen Institute for Advanced Study,Shenzhen,China,518110","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124057314","display_name":"Shui Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shui Yu","raw_affiliation_strings":["University of Electronic Science and Technology of China,Shenzhen Institute for Advanced Study,Shenzhen,China,518110"],"affiliations":[{"raw_affiliation_string":"University of Electronic Science and Technology of China,Shenzhen Institute for Advanced Study,Shenzhen,China,518110","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5124083011","display_name":"Yun Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yun Li","raw_affiliation_strings":["University of Electronic Science and Technology of China,Shenzhen Institute for Advanced Study,Shenzhen,China,518110"],"affiliations":[{"raw_affiliation_string":"University of Electronic Science and Technology of China,Shenzhen Institute for Advanced Study,Shenzhen,China,518110","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5124090248"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.68773423,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"5368","last_page":"5373"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.8996999859809875,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.8996999859809875,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10689","display_name":"Remote-Sensing Image Classification","score":0.01549999974668026,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12389","display_name":"Infrared Target Detection Methodologies","score":0.013100000098347664,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.6877999901771545},{"id":"https://openalex.org/keywords/pixel","display_name":"Pixel","score":0.5692999958992004},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.4699999988079071},{"id":"https://openalex.org/keywords/image-fusion","display_name":"Image fusion","score":0.42010000348091125},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.40220001339912415},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.3955000042915344},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.38449999690055847},{"id":"https://openalex.org/keywords/sensor-fusion","display_name":"Sensor fusion","score":0.37950000166893005},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.3695000112056732}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7577000260353088},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.6877999901771545},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6642000079154968},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.572700023651123},{"id":"https://openalex.org/C160633673","wikidata":"https://www.wikidata.org/wiki/Q355198","display_name":"Pixel","level":2,"score":0.5692999958992004},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.4699999988079071},{"id":"https://openalex.org/C69744172","wikidata":"https://www.wikidata.org/wiki/Q860822","display_name":"Image fusion","level":3,"score":0.42010000348091125},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.40220001339912415},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.3955000042915344},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.38449999690055847},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.37950000166893005},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.3695000112056732},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.36890000104904175},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.3553999960422516},{"id":"https://openalex.org/C2776429412","wikidata":"https://www.wikidata.org/wiki/Q4688011","display_name":"Aerial image","level":3,"score":0.3440000116825104},{"id":"https://openalex.org/C2780056265","wikidata":"https://www.wikidata.org/wiki/Q106239881","display_name":"High dynamic range","level":3,"score":0.337799996137619},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3262999951839447},{"id":"https://openalex.org/C2988656282","wikidata":"https://www.wikidata.org/wiki/Q4677630","display_name":"Activity detection","level":2,"score":0.29600000381469727},{"id":"https://openalex.org/C64754055","wikidata":"https://www.wikidata.org/wiki/Q7574053","display_name":"Spatial contextual awareness","level":2,"score":0.28690001368522644},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.2815999984741211},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.27810001373291016},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.2768000066280365},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.2662999927997589},{"id":"https://openalex.org/C2987819851","wikidata":"https://www.wikidata.org/wiki/Q191839","display_name":"Aerial imagery","level":2,"score":0.2660999894142151},{"id":"https://openalex.org/C203595873","wikidata":"https://www.wikidata.org/wiki/Q25389927","display_name":"Change detection","level":2,"score":0.2596000134944916},{"id":"https://openalex.org/C62649853","wikidata":"https://www.wikidata.org/wiki/Q199687","display_name":"Remote sensing","level":1,"score":0.25859999656677246},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.2533000111579895}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/smc58881.2025.11342594","is_oa":false,"landing_page_url":"https://doi.org/10.1109/smc58881.2025.11342594","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5456574559211731,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"},{"score":0.4368760883808136,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320337504","display_name":"Research and Development","ror":"https://ror.org/027s68j25"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W2008056655","https://openalex.org/W2097290407","https://openalex.org/W2102605133","https://openalex.org/W2109255472","https://openalex.org/W2161969291","https://openalex.org/W2327035729","https://openalex.org/W2963037989","https://openalex.org/W2963351448","https://openalex.org/W2991359031","https://openalex.org/W3016780015","https://openalex.org/W3034971973","https://openalex.org/W3137746351","https://openalex.org/W3160733999","https://openalex.org/W4294982749","https://openalex.org/W4315865367","https://openalex.org/W4323361448","https://openalex.org/W4379928223","https://openalex.org/W4389162407","https://openalex.org/W4389776171","https://openalex.org/W4390873076","https://openalex.org/W4391559729","https://openalex.org/W4391667303","https://openalex.org/W4392157793","https://openalex.org/W4392231788","https://openalex.org/W4392667193","https://openalex.org/W4393171204","https://openalex.org/W4396216689","https://openalex.org/W4396605538","https://openalex.org/W4398756379","https://openalex.org/W4402703046","https://openalex.org/W4407134472","https://openalex.org/W4407243430","https://openalex.org/W4407490772"],"related_works":[],"abstract_inverted_index":{"At":[0],"present,":[1],"accurate":[2],"detection":[3,33,44,171],"of":[4,163,172],"small":[5,173],"objects":[6,174],"in":[7,14,34,58,175],"an":[8],"aerial":[9],"imagery":[10],"remains":[11],"a":[12,35,42,62,88],"challenge":[13],"remote":[15,37,176],"sensing":[16,38,177],"due":[17],"to":[18,75,102],"limited":[19],"pixel":[20],"resolution,":[21],"background":[22],"clutter,":[23],"and":[24,51,80,86,98,133,143,154,169],"scale":[25],"variations.":[26],"To":[27],"address":[28],"these":[29],"issues":[30],"for":[31,167],"high-precision":[32],"complex":[36],"scene,":[39],"we":[40],"propose":[41],"novel":[43],"framework":[45],"based":[46],"on":[47,110,144],"RepViT":[48,65],"Dynamic":[49,63],"Fusion":[50],"YOLOv11,":[52],"termed":[53],"RDF-YOLO.":[54],"The":[55,179],"RDF-YOLO":[56,118,128,165],"brings":[57],"two":[59],"core":[60],"innovations:(1)":[61],"Scale":[64],"module":[66,91],"that":[67,92,116],"integrates":[68],"lightweight":[69],"Transformer":[70],"operations":[71],"into":[72],"the":[73,111,117,127,140,161,164],"backbone":[74],"enhance":[76],"global":[77],"context":[78],"modeling":[79],"semantic":[81],"discrimination":[82],"under":[83],"noisy":[84],"conditions,":[85],"(2)":[87],"dynamic":[89],"fusion":[90,100],"incorporates":[93],"spatially":[94],"aware":[95],"dilated":[96],"convolutions":[97],"channel-adaptive":[99],"strategies":[101],"enable":[103],"flexible,":[104],"scale-aware":[105],"feature":[106],"interaction.":[107],"Extensive":[108],"experiments":[109],"challenging":[112],"AI-TOD":[113],"dataset":[114],"show":[115],"outperforms":[119],"state-of-the-art":[120],"methods":[121],"by":[122,131,137],"substantial":[123],"margins.":[124],"In":[125],"particular,":[126],"improves":[129],"AP50:95":[130],"6.9%":[132],"AP<inf":[134,148,151,155],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[135,149,152,156],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">50</inf>":[136],"8.6%":[138],"over":[139],"YOLOv11":[141],"baseline":[142],"small-object":[145],"metrics,":[146],"including":[147],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">vt</inf>,":[150],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">t</inf>,":[153],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">s</inf>.":[157],"These":[158],"results":[159],"verify":[160],"effectiveness":[162],"architecture":[166],"robust":[168],"efficient":[170],"imagery.":[178],"source":[180],"code":[181],"is":[182],"available":[183],"at":[184],"https://github.com/AssiiKk/RDF-YOLO.":[185]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2026-01-29T00:00:00"}
