{"id":"https://openalex.org/W7129296275","doi":"https://doi.org/10.1109/icipw68931.2025.11385893","title":"Enhanced Object Detection on Aerial Cityscapes Via Augmentation and Yolo Variants","display_name":"Enhanced Object Detection on Aerial Cityscapes Via Augmentation and Yolo Variants","publication_year":2025,"publication_date":"2025-09-14","ids":{"openalex":"https://openalex.org/W7129296275","doi":"https://doi.org/10.1109/icipw68931.2025.11385893"},"language":null,"primary_location":{"id":"doi:10.1109/icipw68931.2025.11385893","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icipw68931.2025.11385893","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Image Processing Workshops (ICIPW)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5126274313","display_name":"Vu Minh Hieu","orcid":null},"institutions":[{"id":"https://openalex.org/I67868205","display_name":"VNU University of Science","ror":"https://ror.org/05w54hk79","country_code":"VN","type":"education","lineage":["https://openalex.org/I177233841","https://openalex.org/I67868205"]}],"countries":["VN"],"is_corresponding":true,"raw_author_name":"Vu Minh Hieu","raw_affiliation_strings":["VNU University of Science,Hanoi,Vietnam"],"affiliations":[{"raw_affiliation_string":"VNU University of Science,Hanoi,Vietnam","institution_ids":["https://openalex.org/I67868205"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126243151","display_name":"Phan The Son","orcid":null},"institutions":[{"id":"https://openalex.org/I67868205","display_name":"VNU University of Science","ror":"https://ror.org/05w54hk79","country_code":"VN","type":"education","lineage":["https://openalex.org/I177233841","https://openalex.org/I67868205"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Phan The Son","raw_affiliation_strings":["VNU University of Science,Hanoi,Vietnam"],"affiliations":[{"raw_affiliation_string":"VNU University of Science,Hanoi,Vietnam","institution_ids":["https://openalex.org/I67868205"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126213904","display_name":"Nguyen Trong Duc","orcid":null},"institutions":[{"id":"https://openalex.org/I67868205","display_name":"VNU University of Science","ror":"https://ror.org/05w54hk79","country_code":"VN","type":"education","lineage":["https://openalex.org/I177233841","https://openalex.org/I67868205"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Nguyen Trong Duc","raw_affiliation_strings":["VNU University of Science,Hanoi,Vietnam"],"affiliations":[{"raw_affiliation_string":"VNU University of Science,Hanoi,Vietnam","institution_ids":["https://openalex.org/I67868205"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029451971","display_name":"Du Duc Tien","orcid":null},"institutions":[{"id":"https://openalex.org/I4210155906","display_name":"National Institute of Meteorological Sciences","ror":"https://ror.org/04m2hj141","country_code":"KR","type":"facility","lineage":["https://openalex.org/I4210155906"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Du Duc Tien","raw_affiliation_strings":["National Center for Hydro-Meteorological Forecasting,Hanoi,Vietnam"],"affiliations":[{"raw_affiliation_string":"National Center for Hydro-Meteorological Forecasting,Hanoi,Vietnam","institution_ids":["https://openalex.org/I4210155906"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5126254643","display_name":"Do Thanh Ha","orcid":null},"institutions":[{"id":"https://openalex.org/I4210095603","display_name":"Vietnam Posts and Telecommunications Group (Vietnam)","ror":"https://ror.org/00q0e7f94","country_code":"VN","type":"company","lineage":["https://openalex.org/I4210095603"]},{"id":"https://openalex.org/I4400600977","display_name":"Posts and Telecommunications Institute of Technology","ror":"https://ror.org/0363rtq22","country_code":null,"type":"education","lineage":["https://openalex.org/I4400600977"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Do Thanh Ha","raw_affiliation_strings":["Posts and Telecommunications Institute of Technology,Hanoi,Vietnam"],"affiliations":[{"raw_affiliation_string":"Posts and Telecommunications Institute of Technology,Hanoi,Vietnam","institution_ids":["https://openalex.org/I4210095603","https://openalex.org/I4400600977"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5126274313"],"corresponding_institution_ids":["https://openalex.org/I67868205"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.70597495,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"43","last_page":"47"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9505000114440918,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9505000114440918,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.010099999606609344,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10689","display_name":"Remote-Sensing Image Classification","score":0.007400000002235174,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.7386000156402588},{"id":"https://openalex.org/keywords/cityscape","display_name":"Cityscape","score":0.6733999848365784},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.6583999991416931},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.6233000159263611},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5533999800682068},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.4219000041484833},{"id":"https://openalex.org/keywords/core","display_name":"Core (optical fiber)","score":0.4018000066280365},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.3921999931335449}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7408000230789185},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.7386000156402588},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6980999708175659},{"id":"https://openalex.org/C2779685930","wikidata":"https://www.wikidata.org/wiki/Q1935974","display_name":"Cityscape","level":2,"score":0.6733999848365784},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.6583999991416931},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.6233000159263611},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5789999961853027},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5533999800682068},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.4219000041484833},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.4018000066280365},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.3921999931335449},{"id":"https://openalex.org/C2776429412","wikidata":"https://www.wikidata.org/wiki/Q4688011","display_name":"Aerial image","level":3,"score":0.375900000333786},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.3249000012874603},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.30979999899864197},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.2874000072479248},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.2856999933719635},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.28439998626708984},{"id":"https://openalex.org/C203595873","wikidata":"https://www.wikidata.org/wiki/Q25389927","display_name":"Change detection","level":2,"score":0.271699994802475},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.2524000108242035},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.25110000371932983}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icipw68931.2025.11385893","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icipw68931.2025.11385893","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Image Processing Workshops (ICIPW)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/11","display_name":"Sustainable cities and communities","score":0.8651806116104126}],"awards":[],"funders":[{"id":"https://openalex.org/F4320322108","display_name":"Ministry of Science and Technology","ror":"https://ror.org/032e49973"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":6,"referenced_works":["https://openalex.org/W2295936755","https://openalex.org/W2962749812","https://openalex.org/W3119214860","https://openalex.org/W3131169764","https://openalex.org/W3170033848","https://openalex.org/W3210586215"],"related_works":[],"abstract_inverted_index":{"This":[0,18,96,130],"paper":[1],"details":[2],"our":[3,117],"submission":[4],"to":[5,39],"the":[6,13,121,142],"IEEE":[7,143],"ICIP":[8,144],"2025":[9],"Grand":[10],"Challenge":[11],"on":[12],"Cityscape":[14],"Aerial":[15],"Image":[16],"Dataset.":[17],"dataset":[19,52],"includes":[20],"top-down":[21],"remote":[22],"sensing":[23],"images":[24],"featuring":[25],"a":[26,54,126,138],"diverse":[27],"range":[28],"of":[29,50,106],"objects,":[30],"from":[31],"large":[32],"structures":[33],"like":[34],"buildings":[35],"and":[36,45,73,79,92,103,113,124],"basketball":[37],"courts":[38,112],"smaller":[40],"elements":[41],"such":[42,109],"as":[43,110],"vehicles":[44],"crosswalks.":[46,114],"A":[47],"key":[48],"characteristic":[49],"this":[51],"is":[53,84,97],"significant":[55],"class":[56,90],"imbalance.":[57],"To":[58],"address":[59],"this,":[60],"we":[61],"propose":[62],"an":[63,85],"object":[64,94],"detection":[65],"framework":[66],"that":[67,88],"leverages":[68],"advanced":[69],"data":[70],"augmentation":[71,86,132],"techniques":[72],"state-of-the-art":[74],"YOLO-based":[75],"models":[76],"(specifically,":[77],"YOLOv11":[78],"YOLOv12).":[80],"Our":[81],"core":[82],"contribution":[83],"strategy":[87,133],"balances":[89],"distributions":[91],"enhances":[93],"diversity.":[95],"achieved":[98],"by":[99],"selectively":[100],"removing,":[101],"augmenting,":[102],"reinserting":[104],"instances":[105],"minority":[107],"classes,":[108],"tennis":[111],"We":[115],"present":[116],"model":[118],"architecture,":[119],"detail":[120],"training":[122],"convergence,":[123],"provide":[125],"comprehensive":[127],"error":[128],"analysis.":[129],"targeted":[131],"proved":[134],"highly":[135],"effective,":[136],"securing":[137],"top-2":[139],"ranking":[140],"in":[141],"competition.":[145]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2026-02-18T00:00:00"}
