{"id":"https://openalex.org/W4416136544","doi":"https://doi.org/10.1109/ipta66025.2025.11222002","title":"Decoding UAV Scenes: A Novel Framework for Deep Semantic Segmentation Using U-Net and Transformer Hybrids","display_name":"Decoding UAV Scenes: A Novel Framework for Deep Semantic Segmentation Using U-Net and Transformer Hybrids","publication_year":2025,"publication_date":"2025-10-13","ids":{"openalex":"https://openalex.org/W4416136544","doi":"https://doi.org/10.1109/ipta66025.2025.11222002"},"language":null,"primary_location":{"id":"doi:10.1109/ipta66025.2025.11222002","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipta66025.2025.11222002","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 Fourteenth International Conference on Image Processing, Theory, Tools &amp;amp; Applications (IPTA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056889379","display_name":"Syed Muhammad Usman","orcid":"https://orcid.org/0000-0002-0504-3558"},"institutions":[{"id":"https://openalex.org/I59225215","display_name":"Bahria University","ror":"https://ror.org/02v8d7770","country_code":"PK","type":"education","lineage":["https://openalex.org/I59225215"]}],"countries":["PK"],"is_corresponding":true,"raw_author_name":"Syed Muhammad Usman","raw_affiliation_strings":["Bahria University,Department of Computer Science,Islamabad,Pakistan"],"affiliations":[{"raw_affiliation_string":"Bahria University,Department of Computer Science,Islamabad,Pakistan","institution_ids":["https://openalex.org/I59225215"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014991694","display_name":"Arif ul Maula Khan","orcid":"https://orcid.org/0000-0002-9956-5479"},"institutions":[{"id":"https://openalex.org/I899713450","display_name":"Air University","ror":"https://ror.org/03yfe9v83","country_code":"PK","type":"education","lineage":["https://openalex.org/I899713450"]}],"countries":["PK"],"is_corresponding":false,"raw_author_name":"Ali Hamza Khan","raw_affiliation_strings":["Air University,Department of Creative Technologies,Islamabad,Pakistan"],"affiliations":[{"raw_affiliation_string":"Air University,Department of Creative Technologies,Islamabad,Pakistan","institution_ids":["https://openalex.org/I899713450"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057992025","display_name":"Nida Khalid","orcid":null},"institutions":[{"id":"https://openalex.org/I59225215","display_name":"Bahria University","ror":"https://ror.org/02v8d7770","country_code":"PK","type":"education","lineage":["https://openalex.org/I59225215"]}],"countries":["PK"],"is_corresponding":false,"raw_author_name":"Nida Khalid","raw_affiliation_strings":["Bahria University,Department of Computer Science,Islamabad,Pakistan"],"affiliations":[{"raw_affiliation_string":"Bahria University,Department of Computer Science,Islamabad,Pakistan","institution_ids":["https://openalex.org/I59225215"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5034050364","display_name":"Qasim Mehmood","orcid":"https://orcid.org/0000-0002-9274-7421"},"institutions":[{"id":"https://openalex.org/I59225215","display_name":"Bahria University","ror":"https://ror.org/02v8d7770","country_code":"PK","type":"education","lineage":["https://openalex.org/I59225215"]}],"countries":["PK"],"is_corresponding":false,"raw_author_name":"Qasim Mehmood","raw_affiliation_strings":["Bahria University,Department of Computer Science,Islamabad,Pakistan"],"affiliations":[{"raw_affiliation_string":"Bahria University,Department of Computer Science,Islamabad,Pakistan","institution_ids":["https://openalex.org/I59225215"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5056889379"],"corresponding_institution_ids":["https://openalex.org/I59225215"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.46341258,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11133","display_name":"UAV Applications and Optimization","score":0.7462999820709229,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11133","display_name":"UAV Applications and Optimization","score":0.7462999820709229,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.08079999685287476,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11211","display_name":"3D Surveying and Cultural Heritage","score":0.02930000051856041,"subfield":{"id":"https://openalex.org/subfields/1907","display_name":"Geology"},"field":{"id":"https://openalex.org/fields/19","display_name":"Earth and Planetary Sciences"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.7609999775886536},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.6937000155448914},{"id":"https://openalex.org/keywords/drone","display_name":"Drone","score":0.5892999768257141},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5745000243186951},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5307000279426575},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.46540001034736633},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.4641000032424927},{"id":"https://openalex.org/keywords/test-set","display_name":"Test set","score":0.4593999981880188}],"concepts":[{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.7609999775886536},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.699999988079071},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.6937000155448914},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6486999988555908},{"id":"https://openalex.org/C59519942","wikidata":"https://www.wikidata.org/wiki/Q650665","display_name":"Drone","level":2,"score":0.5892999768257141},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5745000243186951},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5307000279426575},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.46540001034736633},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.4641000032424927},{"id":"https://openalex.org/C169903167","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Test set","level":2,"score":0.4593999981880188},{"id":"https://openalex.org/C16910744","wikidata":"https://www.wikidata.org/wiki/Q7705759","display_name":"Test data","level":2,"score":0.45879998803138733},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3882000148296356},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.3785000145435333},{"id":"https://openalex.org/C64543145","wikidata":"https://www.wikidata.org/wiki/Q162942","display_name":"Intersection (aeronautics)","level":2,"score":0.3488999903202057},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.33640000224113464},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.31380000710487366},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.2946000099182129},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.2784999907016754},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2685000002384186},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.2639999985694885},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.2606000006198883},{"id":"https://openalex.org/C98083399","wikidata":"https://www.wikidata.org/wiki/Q3246517","display_name":"Underwater","level":2,"score":0.251800000667572}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ipta66025.2025.11222002","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipta66025.2025.11222002","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 Fourteenth International Conference on Image Processing, Theory, Tools &amp;amp; Applications (IPTA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W2618412219","https://openalex.org/W2737129951","https://openalex.org/W2767862515","https://openalex.org/W2809426059","https://openalex.org/W2814568980","https://openalex.org/W2886397424","https://openalex.org/W2889749348","https://openalex.org/W2894369069","https://openalex.org/W2901506785","https://openalex.org/W2909156520","https://openalex.org/W2930359273","https://openalex.org/W2950604226","https://openalex.org/W2963606888","https://openalex.org/W2963659230","https://openalex.org/W2964011874","https://openalex.org/W2972009374","https://openalex.org/W2992559558","https://openalex.org/W3008352878","https://openalex.org/W3014055653","https://openalex.org/W3028752951","https://openalex.org/W3049335070","https://openalex.org/W3062776680","https://openalex.org/W3081531022","https://openalex.org/W3093647646","https://openalex.org/W3110908156","https://openalex.org/W3155788091","https://openalex.org/W3158613395","https://openalex.org/W3173420347","https://openalex.org/W3184861222","https://openalex.org/W3211232398","https://openalex.org/W4389262616","https://openalex.org/W4393397443"],"related_works":[],"abstract_inverted_index":{"The":[0,172],"integration":[1],"of":[2,13,123,127,133,145,149,161,167,185,189,202],"UAV":[3],"operations":[4],"with":[5,73,78,119,141,158,181],"remote":[6],"sensing":[7],"technology":[8],"has":[9],"a":[10,114,120,142,182],"wide":[11],"range":[12],"applications,":[14],"including":[15,71],"urban":[16],"planning":[17],"and":[18,41,59,86,100,110,129,147,153,163,165,169,175,187],"environmental":[19],"monitoring.":[20],"This":[21],"research":[22,68],"addresses":[23],"the":[24,30,66,130,196],"challenges":[25],"in":[26],"semantic":[27,200],"segmentation":[28,201],"using":[29,93],"Varied":[31],"Drone":[32],"Dataset":[33],"(VDD),":[34],"which":[35],"contains":[36],"diverse":[37],"landscapes,":[38],"camera":[39],"perspectives,":[40],"weather":[42],"conditions.":[43],"Despite":[44],"its":[45,63],"richness,":[46],"VDD":[47],"suffers":[48],"from":[49],"low":[50],"Intersection":[51],"over":[52],"Union":[53],"(IoU)":[54],"values,":[55],"unbalanced":[56],"class":[57],"distribution,":[58],"limited":[60],"accuracy,":[61],"reducing":[62],"usability.":[64],"In":[65],"proposed":[67],"work,":[69],"models":[70],"Segmenter":[72],"Transformer":[74],"(SegFormer),":[75],"Mask":[76,151],"Regions":[77],"Convolutional":[79],"Neural":[80,88],"Networks":[81],"(R-CNN),":[82],"U-shaped":[83],"Network":[84,89],"(U-Net),":[85],"Efficient":[87],"(E-Net)":[90],"are":[91],"evaluated":[92],"metrics":[94],"such":[95],"as":[96],"IoU,":[97],"precision,":[98],"recall,":[99],"F1-score.":[101],"A":[102],"comprehensive":[103],"experimental":[104],"setup":[105],"includes":[106],"strong":[107],"train-test":[108],"splits":[109],"data":[111],"augmentation.":[112],"As":[113],"result":[115],"U-Net":[116,174,192],"performed":[117],"best,":[118],"test":[121,125,143,159,183],"loss":[122,144,184],"0.614,":[124],"accuracy":[126,148,188],"0.861,":[128],"highest":[131],"IoU":[132],"0.584,":[134],"effectively":[135],"learning":[136],"complex":[137],"patterns.":[138],"E-Net":[139],"followed":[140],"0.652":[146],"0.853.":[150],"R-CNN":[152],"SegFormer":[154,176],"showed":[155],"lower":[156],"performance,":[157],"losses":[160],"1.767":[162],"2.504,":[164],"accuracies":[166],"0.808":[168],"0.797,":[170],"respectively.":[171],"hybrid":[173],"model":[177],"yielded":[178],"moderate":[179],"results":[180],"1.301":[186],"0.795.":[190],"Overall,":[191],"proved":[193],"to":[194],"be":[195],"most":[197],"optimal":[198],"for":[199],"drone":[203],"imagery.":[204]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-11-10T00:00:00"}
