{"id":"https://openalex.org/W7110870058","doi":"https://doi.org/10.48550/arxiv.2512.07379","title":"Enhancing Small Object Detection with YOLO: A Novel Framework for Improved Accuracy and Efficiency","display_name":"Enhancing Small Object Detection with YOLO: A Novel Framework for Improved Accuracy and Efficiency","publication_year":2025,"publication_date":"2025-12-08","ids":{"openalex":"https://openalex.org/W7110870058","doi":"https://doi.org/10.48550/arxiv.2512.07379"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2512.07379","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.07379","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2512.07379","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Moghadami, Mahila","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Moghadami, Mahila","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Keyvanrad, Mohammad Ali","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Keyvanrad, Mohammad Ali","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Sabaghian, Melika","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sabaghian, Melika","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9545999765396118,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9545999765396118,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.005799999926239252,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.003800000064074993,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.6978999972343445},{"id":"https://openalex.org/keywords/aerial-image","display_name":"Aerial image","score":0.6050000190734863},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5993000268936157},{"id":"https://openalex.org/keywords/sliding-window-protocol","display_name":"Sliding window protocol","score":0.5756999850273132},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.5674999952316284},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5178999900817871},{"id":"https://openalex.org/keywords/detector","display_name":"Detector","score":0.4982999861240387},{"id":"https://openalex.org/keywords/window","display_name":"Window (computing)","score":0.44690001010894775}],"concepts":[{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.6978999972343445},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.67330002784729},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6717000007629395},{"id":"https://openalex.org/C2776429412","wikidata":"https://www.wikidata.org/wiki/Q4688011","display_name":"Aerial image","level":3,"score":0.6050000190734863},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5993000268936157},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5806000232696533},{"id":"https://openalex.org/C102392041","wikidata":"https://www.wikidata.org/wiki/Q592860","display_name":"Sliding window protocol","level":3,"score":0.5756999850273132},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.5674999952316284},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5178999900817871},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.4982999861240387},{"id":"https://openalex.org/C2778751112","wikidata":"https://www.wikidata.org/wiki/Q835016","display_name":"Window (computing)","level":2,"score":0.44690001010894775},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.3950999975204468},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3912000060081482},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.3521000146865845},{"id":"https://openalex.org/C203595873","wikidata":"https://www.wikidata.org/wiki/Q25389927","display_name":"Change detection","level":2,"score":0.34369999170303345},{"id":"https://openalex.org/C42058472","wikidata":"https://www.wikidata.org/wiki/Q810214","display_name":"Base (topology)","level":2,"score":0.31839999556541443},{"id":"https://openalex.org/C13558536","wikidata":"https://www.wikidata.org/wiki/Q785116","display_name":"Cropping","level":3,"score":0.3158999979496002},{"id":"https://openalex.org/C126422989","wikidata":"https://www.wikidata.org/wiki/Q93586","display_name":"Feature detection (computer vision)","level":4,"score":0.3100000023841858},{"id":"https://openalex.org/C135598885","wikidata":"https://www.wikidata.org/wiki/Q1366302","display_name":"Row","level":2,"score":0.3050000071525574},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.2964000105857849},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.2791999876499176},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2685000002384186},{"id":"https://openalex.org/C2987819851","wikidata":"https://www.wikidata.org/wiki/Q191839","display_name":"Aerial imagery","level":2,"score":0.25099998712539673}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2512.07379","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.07379","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2512.07379","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.07379","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","score":0.5749779343605042,"id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"This":[0,250],"paper":[1],"investigates":[2],"and":[3,27,39,46,62,89,99,105,143,146,174],"develops":[4],"methods":[5],"for":[6,16,67,70,131,170],"detecting":[7,17],"small":[8,18,71,92,153],"objects":[9,19],"in":[10,20,59,91,101,128,137,186,259],"large-scale":[11,172],"aerial":[12,21,57],"images.":[13],"Current":[14],"approaches":[15],"images":[22],"often":[23],"involve":[24],"image":[25,181],"cropping":[26,38,97],"modifications":[28],"to":[29,86,140,151,232,245,263],"detector":[30,231],"network":[31],"architectures.":[32],"Techniques":[33],"such":[34],"as":[35],"sliding":[36,102],"window":[37,103],"architectural":[40,110],"enhancements,":[41],"including":[42,123],"higher-resolution":[43],"feature":[44,125,132],"maps":[45],"attention":[47],"mechanisms,":[48],"are":[49],"commonly":[50],"employed.":[51],"Given":[52],"the":[53,65,82,119,129,138,166,196,208,213,218,222,229,235],"growing":[54],"importance":[55],"of":[56,165,225,237],"imagery":[58],"various":[60],"critical":[61],"industrial":[63],"applications,":[64],"need":[66],"robust":[68],"frameworks":[69,169],"object":[72,93,154],"detection":[73,94,155,202],"becomes":[74],"imperative.":[75],"To":[76],"address":[77],"this":[78,246],"need,":[79],"we":[80,112,158],"adopted":[81],"base":[83,120,223],"SW-YOLO":[84],"approach":[85],"enhance":[87],"speed":[88],"accuracy":[90,193,216,224,236,260],"by":[95,117,203,228],"refining":[96],"dimensions":[98],"overlap":[100],"usage":[104],"subsequently":[106],"enhanced":[107],"it":[108],"through":[109],"modifications.":[111],"propose":[113],"a":[114,148,204,253],"novel":[115],"model":[116,121,190,211],"modifying":[118],"architecture,":[122],"advanced":[124],"extraction":[126],"modules":[127],"neck":[130],"map":[133],"enhancement,":[134],"integrating":[135],"CBAM":[136],"backbone":[139],"preserve":[141],"spatial":[142],"channel":[144],"information,":[145],"introducing":[147],"new":[149],"head":[150],"boost":[152],"accuracy.":[156,187],"Finally,":[157],"compared":[159],"our":[160],"method":[161,243],"with":[162],"SAHI,":[163],"one":[164],"most":[167],"powerful":[168],"processing":[171],"images,":[173],"CZDet,":[175,238],"which":[176,239],"is":[177,240,248],"also":[178],"based":[179],"on":[180,195,217],"cropping,":[182],"achieving":[183,256],"significant":[184,192,254],"improvements":[185],"The":[188],"proposed":[189,210],"achieves":[191],"gains":[194],"VisDrone2019":[197,219],"dataset,":[198,247],"outperforming":[199],"baseline":[200],"YOLOv5L":[201,230],"substantial":[205],"margin.":[206],"Specifically,":[207],"final":[209],"elevates":[212],"mAP":[214],".5.5":[215],"dataset":[220],"from":[221,261],"35.5":[226,262],"achieved":[227],"61.2.":[233,264],"Notably,":[234],"another":[241],"classic":[242],"applied":[244],"58.36.":[249],"research":[251],"demonstrates":[252],"improvement,":[255],"an":[257],"increase":[258]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-12-10T00:00:00"}
