{"id":"https://openalex.org/W4416250180","doi":"https://doi.org/10.1109/ijcnn64981.2025.11228159","title":"SOD-DETR: Small Object Detection with Attention Enhancement and Multi-scale Feature Fusion for UAV Imagery","display_name":"SOD-DETR: Small Object Detection with Attention Enhancement and Multi-scale Feature Fusion for UAV Imagery","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4416250180","doi":"https://doi.org/10.1109/ijcnn64981.2025.11228159"},"language":null,"primary_location":{"id":"doi:10.1109/ijcnn64981.2025.11228159","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11228159","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101926670","display_name":"Xiaoping Zheng","orcid":"https://orcid.org/0000-0003-3271-3702"},"institutions":[{"id":"https://openalex.org/I3018263800","display_name":"Huzhou University","ror":"https://ror.org/04mvpxy20","country_code":"CN","type":"education","lineage":["https://openalex.org/I3018263800"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xinjie Zheng","raw_affiliation_strings":["Huzhou University,School of Information Engineering,Huzhou,China"],"affiliations":[{"raw_affiliation_string":"Huzhou University,School of Information Engineering,Huzhou,China","institution_ids":["https://openalex.org/I3018263800"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017693930","display_name":"Bin Shao","orcid":"https://orcid.org/0000-0001-8697-2174"},"institutions":[{"id":"https://openalex.org/I3018263800","display_name":"Huzhou University","ror":"https://ror.org/04mvpxy20","country_code":"CN","type":"education","lineage":["https://openalex.org/I3018263800"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bin Shao","raw_affiliation_strings":["Huzhou University,School of Information Engineering,Huzhou,China"],"affiliations":[{"raw_affiliation_string":"Huzhou University,School of Information Engineering,Huzhou,China","institution_ids":["https://openalex.org/I3018263800"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5101926670"],"corresponding_institution_ids":["https://openalex.org/I3018263800"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.3727475,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9344000220298767,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9344000220298767,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11133","display_name":"UAV Applications and Optimization","score":0.017400000244379044,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.007199999876320362,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.7979000210762024},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.6392999887466431},{"id":"https://openalex.org/keywords/upsampling","display_name":"Upsampling","score":0.5843999981880188},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.53329998254776},{"id":"https://openalex.org/keywords/adaptability","display_name":"Adaptability","score":0.5317999720573425},{"id":"https://openalex.org/keywords/pyramid","display_name":"Pyramid (geometry)","score":0.5293999910354614},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.430400013923645},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3765999972820282}],"concepts":[{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.7979000210762024},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7889999747276306},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7099000215530396},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.6392999887466431},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5968000292778015},{"id":"https://openalex.org/C110384440","wikidata":"https://www.wikidata.org/wiki/Q1143270","display_name":"Upsampling","level":3,"score":0.5843999981880188},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.53329998254776},{"id":"https://openalex.org/C177606310","wikidata":"https://www.wikidata.org/wiki/Q5674297","display_name":"Adaptability","level":2,"score":0.5317999720573425},{"id":"https://openalex.org/C142575187","wikidata":"https://www.wikidata.org/wiki/Q3358290","display_name":"Pyramid (geometry)","level":2,"score":0.5293999910354614},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.430400013923645},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3765999972820282},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.3650999963283539},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.3321000039577484},{"id":"https://openalex.org/C126422989","wikidata":"https://www.wikidata.org/wiki/Q93586","display_name":"Feature detection (computer vision)","level":4,"score":0.30140000581741333},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.29339998960494995},{"id":"https://openalex.org/C2983787585","wikidata":"https://www.wikidata.org/wiki/Q93586","display_name":"Feature matching","level":3,"score":0.29120001196861267},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.2856999933719635},{"id":"https://openalex.org/C182521987","wikidata":"https://www.wikidata.org/wiki/Q2493877","display_name":"Viola\u2013Jones object detection framework","level":5,"score":0.251800000667572},{"id":"https://openalex.org/C203595873","wikidata":"https://www.wikidata.org/wiki/Q25389927","display_name":"Change detection","level":2,"score":0.25029999017715454}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn64981.2025.11228159","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11228159","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W2151103935","https://openalex.org/W2161969291","https://openalex.org/W2193145675","https://openalex.org/W2548197316","https://openalex.org/W2565639579","https://openalex.org/W2570343428","https://openalex.org/W2625219738","https://openalex.org/W2752825721","https://openalex.org/W2804436788","https://openalex.org/W2899607431","https://openalex.org/W2924873663","https://openalex.org/W2963037989","https://openalex.org/W2963179609","https://openalex.org/W2963315052","https://openalex.org/W2963813458","https://openalex.org/W2990230185","https://openalex.org/W3022917557","https://openalex.org/W3034971973","https://openalex.org/W3096609285","https://openalex.org/W3174873843","https://openalex.org/W3190335749","https://openalex.org/W3210586215","https://openalex.org/W4214627427","https://openalex.org/W4391307079","https://openalex.org/W4396585406","https://openalex.org/W4396680782","https://openalex.org/W4402754006"],"related_works":[],"abstract_inverted_index":{"To":[0],"address":[1,48],"challenges":[2],"in":[3,180],"UAV":[4,115,182],"small":[5,38,50,80,177],"object":[6,39,51,81,178],"detection":[7,26,40,179],"including":[8],"large":[9,73],"scale":[10,74],"variations,":[11,75],"insufficient":[12,49],"feature":[13,52,67,88,107],"representation,":[14,53],"and":[15,122,128,138,148,151,158],"loss":[16,100],"of":[17,93,160],"detail":[18],"information,":[19],"this":[20],"paper":[21],"proposes":[22],"a":[23,56,78,105,143,173],"novel":[24],"end-to-end":[25],"model":[27,35],"SOD-DETR.":[28],"Based":[29],"on":[30,112,124],"the":[31,34,64,113,156,166],"DETR":[32],"detector,":[33],"significantly":[36],"improves":[37,87],"performance":[41],"through":[42],"three":[43],"innovative":[44],"modules.":[45],"First,":[46],"to":[47,71,91,97],"we":[54,76,103],"design":[55],"hybrid":[57],"attention":[58],"enhancement":[59,83],"module":[60,109],"C2f-HAE":[61],"that":[62,86,118],"strengthens":[63],"backbone":[65],"network\u2019s":[66],"extraction":[68],"capability.":[69],"Second,":[70],"handle":[72],"introduce":[77],"multi-scale":[79],"perception":[82],"pyramid":[84],"MSOP":[85],"pyramid\u2019s":[89],"adaptability":[90],"objects":[92],"different":[94],"scales.":[95],"Finally,":[96],"mitigate":[98],"information":[99],"during":[101],"upsampling,":[102],"employ":[104],"content-adaptive":[106],"upsampling":[108],"CAFU.":[110],"Experiments":[111],"VisDrone2019":[114],"dataset":[116],"demonstrate":[117],"SOD-DETR":[119,171],"achieves":[120],"52.3%":[121],"32.4%":[123],"mAP<inf":[125,129],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[126,130],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">50</inf>":[127],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">50\u221295</inf>":[131],"metrics":[132],"respectively,":[133],"surpassing":[134],"existing":[135],"mainstream":[136],"one-stage":[137],"two-stage":[139],"detectors":[140],"while":[141],"achieving":[142],"good":[144],"balance":[145],"between":[146],"accuracy":[147],"speed.":[149],"Ablation":[150],"comparative":[152],"experiments":[153],"further":[154],"verify":[155],"effectiveness":[157],"complementarity":[159],"each":[161],"module,":[162],"as":[163,165],"well":[164],"model\u2019s":[167],"advancement.":[168],"The":[169],"proposed":[170],"provides":[172],"new":[174],"solution":[175],"for":[176],"complex":[181],"scenarios.":[183]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-11-14T00:00:00"}
