{"id":"https://openalex.org/W4399418875","doi":"https://doi.org/10.1145/3651671.3651695","title":"A Study on Semantic Segmentation for Small Objects in High-resolution Aerial Images based on Mask R-CNN and HRNet","display_name":"A Study on Semantic Segmentation for Small Objects in High-resolution Aerial Images based on Mask R-CNN and HRNet","publication_year":2024,"publication_date":"2024-02-02","ids":{"openalex":"https://openalex.org/W4399418875","doi":"https://doi.org/10.1145/3651671.3651695"},"language":"en","primary_location":{"id":"doi:10.1145/3651671.3651695","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3651671.3651695","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 16th International Conference on Machine Learning and Computing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5059133801","display_name":"Sicong Wang","orcid":"https://orcid.org/0009-0009-8367-9146"},"institutions":[{"id":"https://openalex.org/I22716506","display_name":"Lanzhou University of Technology","ror":"https://ror.org/03panb555","country_code":"CN","type":"education","lineage":["https://openalex.org/I22716506"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Sicong Wang","raw_affiliation_strings":["School of Computer and Communication , Lanzhou University of Technology, China"],"raw_orcid":"https://orcid.org/0009-0009-8367-9146","affiliations":[{"raw_affiliation_string":"School of Computer and Communication , Lanzhou University of Technology, China","institution_ids":["https://openalex.org/I22716506"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009989806","display_name":"Yujie Yin","orcid":"https://orcid.org/0009-0006-6045-0929"},"institutions":[{"id":"https://openalex.org/I9842412","display_name":"Nanjing University of Aeronautics and Astronautics","ror":"https://ror.org/01scyh794","country_code":"CN","type":"education","lineage":["https://openalex.org/I9842412"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yujie Yin","raw_affiliation_strings":["School of Automation, Nanjing University of Aeronautics and Astronautics, China"],"raw_orcid":"https://orcid.org/0009-0006-6045-0929","affiliations":[{"raw_affiliation_string":"School of Automation, Nanjing University of Aeronautics and Astronautics, China","institution_ids":["https://openalex.org/I9842412"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058166379","display_name":"Longrui Yang","orcid":"https://orcid.org/0009-0005-9858-0772"},"institutions":[{"id":"https://openalex.org/I22716506","display_name":"Lanzhou University of Technology","ror":"https://ror.org/03panb555","country_code":"CN","type":"education","lineage":["https://openalex.org/I22716506"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Longrui Yang","raw_affiliation_strings":["College of Electrical and Information Engineering , Lanzhou University of Technology, China"],"raw_orcid":"https://orcid.org/0009-0005-9858-0772","affiliations":[{"raw_affiliation_string":"College of Electrical and Information Engineering , Lanzhou University of Technology, China","institution_ids":["https://openalex.org/I22716506"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057125017","display_name":"Xiyuan Wang","orcid":"https://orcid.org/0009-0008-0606-0190"},"institutions":[{"id":"https://openalex.org/I22716506","display_name":"Lanzhou University of Technology","ror":"https://ror.org/03panb555","country_code":"CN","type":"education","lineage":["https://openalex.org/I22716506"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiyuan Wang","raw_affiliation_strings":["College of Electrical and Information Engineering , Lanzhou University of Technology, China"],"raw_orcid":"https://orcid.org/0009-0008-0606-0190","affiliations":[{"raw_affiliation_string":"College of Electrical and Information Engineering , Lanzhou University of Technology, China","institution_ids":["https://openalex.org/I22716506"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5113410643","display_name":"Xijun Zhang","orcid":"https://orcid.org/0009-0006-2516-3692"},"institutions":[{"id":"https://openalex.org/I22716506","display_name":"Lanzhou University of Technology","ror":"https://ror.org/03panb555","country_code":"CN","type":"education","lineage":["https://openalex.org/I22716506"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xijun Zhang","raw_affiliation_strings":["School of Computer and Communication , Lanzhou University of Technology, China"],"raw_orcid":"https://orcid.org/0009-0006-2516-3692","affiliations":[{"raw_affiliation_string":"School of Computer and Communication , Lanzhou University of Technology, China","institution_ids":["https://openalex.org/I22716506"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5059133801"],"corresponding_institution_ids":["https://openalex.org/I22716506"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.06982422,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"363","last_page":"368"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11211","display_name":"3D Surveying and Cultural Heritage","score":0.9934999942779541,"subfield":{"id":"https://openalex.org/subfields/1907","display_name":"Geology"},"field":{"id":"https://openalex.org/fields/19","display_name":"Earth and Planetary Sciences"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8220089673995972},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.7952430248260498},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7759077548980713},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5767750144004822},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.5346020460128784},{"id":"https://openalex.org/keywords/residual-neural-network","display_name":"Residual neural network","score":0.5091690421104431},{"id":"https://openalex.org/keywords/pixel","display_name":"Pixel","score":0.4953674077987671},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.49066436290740967},{"id":"https://openalex.org/keywords/high-resolution","display_name":"High resolution","score":0.4901607930660248},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.480984091758728},{"id":"https://openalex.org/keywords/aerial-image","display_name":"Aerial image","score":0.4665173292160034},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.43037691712379456},{"id":"https://openalex.org/keywords/resolution","display_name":"Resolution (logic)","score":0.4176177382469177},{"id":"https://openalex.org/keywords/image-resolution","display_name":"Image resolution","score":0.4121581017971039},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.32544246315956116},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.32050323486328125},{"id":"https://openalex.org/keywords/remote-sensing","display_name":"Remote sensing","score":0.09863710403442383},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.0903349220752716}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8220089673995972},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.7952430248260498},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7759077548980713},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5767750144004822},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.5346020460128784},{"id":"https://openalex.org/C2944601119","wikidata":"https://www.wikidata.org/wiki/Q43744058","display_name":"Residual neural network","level":3,"score":0.5091690421104431},{"id":"https://openalex.org/C160633673","wikidata":"https://www.wikidata.org/wiki/Q355198","display_name":"Pixel","level":2,"score":0.4953674077987671},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.49066436290740967},{"id":"https://openalex.org/C3020199158","wikidata":"https://www.wikidata.org/wiki/Q210521","display_name":"High resolution","level":2,"score":0.4901607930660248},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.480984091758728},{"id":"https://openalex.org/C2776429412","wikidata":"https://www.wikidata.org/wiki/Q4688011","display_name":"Aerial image","level":3,"score":0.4665173292160034},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.43037691712379456},{"id":"https://openalex.org/C138268822","wikidata":"https://www.wikidata.org/wiki/Q1051925","display_name":"Resolution (logic)","level":2,"score":0.4176177382469177},{"id":"https://openalex.org/C205372480","wikidata":"https://www.wikidata.org/wiki/Q210521","display_name":"Image resolution","level":2,"score":0.4121581017971039},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.32544246315956116},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.32050323486328125},{"id":"https://openalex.org/C62649853","wikidata":"https://www.wikidata.org/wiki/Q199687","display_name":"Remote sensing","level":1,"score":0.09863710403442383},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0903349220752716},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3651671.3651695","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3651671.3651695","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 16th International Conference on Machine Learning and Computing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":3,"referenced_works":["https://openalex.org/W2963150697","https://openalex.org/W3107337657","https://openalex.org/W4244186909"],"related_works":["https://openalex.org/W2599472179","https://openalex.org/W1989735375","https://openalex.org/W4323057981","https://openalex.org/W3178607569","https://openalex.org/W4310870954","https://openalex.org/W4308408209","https://openalex.org/W4301783946","https://openalex.org/W3196952692","https://openalex.org/W2034727732","https://openalex.org/W2076075893"],"abstract_inverted_index":{"Due":[0],"to":[1,33,90,102],"the":[2,11,18,78,96,109,133],"high":[3],"resolution":[4,66],"of":[5,21,24,40,75,85,112,135,138,151],"aerial":[6,44,142],"images,":[7],"small":[8,25,41,139],"objects":[9,26,42,140],"in":[10,43,58,141,146],"images":[12,45,143],"occupy":[13],"very":[14,28],"few":[15],"pixels.":[16],"Additionally,":[17],"surface":[19],"features":[20],"some":[22],"types":[23],"are":[27],"similar,":[29],"making":[30],"them":[31],"difficult":[32],"distinguish.":[34],"These":[35],"factors":[36],"make":[37],"semantic":[38,105,136],"segmentation":[39,106,137],"a":[46],"challenging":[47],"task":[48],"with":[49,61,108,122],"poor":[50],"performance.":[51,124],"In":[52],"this":[53,128],"paper,":[54],"we":[55],"replaced":[56],"ResNet":[57],"Mask":[59],"R-CNN":[60],"HRNet,":[62],"which":[63,81],"has":[64],"better":[65,147],"preservation":[67],"ability.":[68],"The":[69],"proposed":[70],"method":[71,118,129],"achieved":[72],"an":[73,83],"mIoU":[74],"68.06":[76],"on":[77],"iSAID":[79],"dataset,":[80],"is":[82],"improvement":[84],"13.02%":[86],"and":[87,93,114,144,149],"10.94%":[88],"compared":[89,101],"using":[91],"ResNet-50":[92],"ResNet-101":[94],"as":[95],"backbone":[97],"network,":[98],"respectively.":[99],"Moreover,":[100],"other":[103],"advanced":[104],"algorithms":[107],"same":[110],"number":[111],"parameters":[113],"computational":[115],"complexity,":[116],"our":[117],"achieves":[119],"higher":[120],"accuracy":[121,134],"similar":[123],"We":[125],"expect":[126],"that":[127],"can":[130],"help":[131],"improve":[132],"assist":[145],"identification":[148],"localization":[150],"these":[152],"objects.":[153]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
