{"id":"https://openalex.org/W7133208196","doi":"https://doi.org/10.1145/3797273","title":"MonoLS: Multi-Scale Feature Fusion and Spatially-Aware Attention for Monocular 3D Object Detection","display_name":"MonoLS: Multi-Scale Feature Fusion and Spatially-Aware Attention for Monocular 3D Object Detection","publication_year":2026,"publication_date":"2026-03-02","ids":{"openalex":"https://openalex.org/W7133208196","doi":"https://doi.org/10.1145/3797273"},"language":"en","primary_location":{"id":"doi:10.1145/3797273","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3797273","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087225784","display_name":"Honghao Gao","orcid":"https://orcid.org/0000-0001-6861-9684"},"institutions":[{"id":"https://openalex.org/I141962983","display_name":"Shanghai University of Engineering Science","ror":"https://ror.org/0557b9y08","country_code":"CN","type":"education","lineage":["https://openalex.org/I141962983"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Honghao Gao","raw_affiliation_strings":["School of Computer Engineering and Science, Shanghai University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0001-6861-9684","affiliations":[{"raw_affiliation_string":"School of Computer Engineering and Science, Shanghai University, Shanghai, China","institution_ids":["https://openalex.org/I141962983"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127788768","display_name":"Dubin Feng","orcid":null},"institutions":[{"id":"https://openalex.org/I141962983","display_name":"Shanghai University of Engineering Science","ror":"https://ror.org/0557b9y08","country_code":"CN","type":"education","lineage":["https://openalex.org/I141962983"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dubin Feng","raw_affiliation_strings":["School of Computer Engineering and Science, Shanghai University, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0001-5785-140X","affiliations":[{"raw_affiliation_string":"School of Computer Engineering and Science, Shanghai University, Shanghai, China","institution_ids":["https://openalex.org/I141962983"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100423485","display_name":"Ye Wang","orcid":"https://orcid.org/0000-0003-1454-2161"},"institutions":[{"id":"https://openalex.org/I141962983","display_name":"Shanghai University of Engineering Science","ror":"https://ror.org/0557b9y08","country_code":"CN","type":"education","lineage":["https://openalex.org/I141962983"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ye Wang","raw_affiliation_strings":["School of Computer Engineering and Science, Shanghai University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0003-1454-2161","affiliations":[{"raw_affiliation_string":"School of Computer Engineering and Science, Shanghai University, Shanghai, China","institution_ids":["https://openalex.org/I141962983"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Zhihao Pan","orcid":"https://orcid.org/0009-0004-4334-6711"},"institutions":[{"id":"https://openalex.org/I141962983","display_name":"Shanghai University of Engineering Science","ror":"https://ror.org/0557b9y08","country_code":"CN","type":"education","lineage":["https://openalex.org/I141962983"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhihao Pan","raw_affiliation_strings":["School of Computer Engineering and Science, Shanghai University, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0004-4334-6711","affiliations":[{"raw_affiliation_string":"School of Computer Engineering and Science, Shanghai University, Shanghai, China","institution_ids":["https://openalex.org/I141962983"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059184595","display_name":"Yueshen Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yueshen Xu","raw_affiliation_strings":["School of Computer Science and Technology, Xidian University, Xi\u2019an, China"],"raw_orcid":"https://orcid.org/0000-0001-7210-0543","affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Xidian University, Xi\u2019an, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043988868","display_name":"Bader Fahad Alkhamees","orcid":"https://orcid.org/0000-0001-7479-7102"},"institutions":[{"id":"https://openalex.org/I28022161","display_name":"King Saud University","ror":"https://ror.org/02f81g417","country_code":"SA","type":"education","lineage":["https://openalex.org/I28022161"]}],"countries":["SA"],"is_corresponding":false,"raw_author_name":"Bader Fahad Alkhamees","raw_affiliation_strings":["Department of Information Systems, King Saud University, Riyadh, Saudi Arabia"],"raw_orcid":"https://orcid.org/0000-0001-7479-7102","affiliations":[{"raw_affiliation_string":"Department of Information Systems, King Saud University, Riyadh, Saudi Arabia","institution_ids":["https://openalex.org/I28022161"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5087225784"],"corresponding_institution_ids":["https://openalex.org/I141962983"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.39537295,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"22","issue":"4","first_page":"1","last_page":"24"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9672999978065491,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9672999978065491,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.003800000064074993,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.0027000000700354576,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/monocular","display_name":"Monocular","score":0.6825000047683716},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.650600016117096},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.6474000215530396},{"id":"https://openalex.org/keywords/bounding-overwatch","display_name":"Bounding overwatch","score":0.5940999984741211},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5874000191688538},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5152999758720398},{"id":"https://openalex.org/keywords/monocular-vision","display_name":"Monocular vision","score":0.5041000247001648},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.49729999899864197},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.48649999499320984}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.862500011920929},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.8158000111579895},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.7038999795913696},{"id":"https://openalex.org/C65909025","wikidata":"https://www.wikidata.org/wiki/Q1945033","display_name":"Monocular","level":2,"score":0.6825000047683716},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.650600016117096},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.6474000215530396},{"id":"https://openalex.org/C63584917","wikidata":"https://www.wikidata.org/wiki/Q333286","display_name":"Bounding overwatch","level":2,"score":0.5940999984741211},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5874000191688538},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5152999758720398},{"id":"https://openalex.org/C158829959","wikidata":"https://www.wikidata.org/wiki/Q1640606","display_name":"Monocular vision","level":2,"score":0.5041000247001648},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.49729999899864197},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.48649999499320984},{"id":"https://openalex.org/C147037132","wikidata":"https://www.wikidata.org/wiki/Q6865426","display_name":"Minimum bounding box","level":3,"score":0.45989999175071716},{"id":"https://openalex.org/C64754055","wikidata":"https://www.wikidata.org/wiki/Q7574053","display_name":"Spatial contextual awareness","level":2,"score":0.43389999866485596},{"id":"https://openalex.org/C51399673","wikidata":"https://www.wikidata.org/wiki/Q504027","display_name":"Lidar","level":2,"score":0.412200003862381},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3880000114440918},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.36579999327659607},{"id":"https://openalex.org/C159620131","wikidata":"https://www.wikidata.org/wiki/Q1938983","display_name":"Spatial analysis","level":2,"score":0.34380000829696655},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.3167000114917755},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.30880001187324524},{"id":"https://openalex.org/C126422989","wikidata":"https://www.wikidata.org/wiki/Q93586","display_name":"Feature detection (computer vision)","level":4,"score":0.3019999861717224},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.29179999232292175},{"id":"https://openalex.org/C2781122975","wikidata":"https://www.wikidata.org/wiki/Q16928266","display_name":"Semantic feature","level":2,"score":0.2806999981403351},{"id":"https://openalex.org/C69744172","wikidata":"https://www.wikidata.org/wiki/Q860822","display_name":"Image fusion","level":3,"score":0.28040000796318054},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.2775000035762787},{"id":"https://openalex.org/C52672216","wikidata":"https://www.wikidata.org/wiki/Q1749840","display_name":"Depth perception","level":3,"score":0.26260000467300415}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3797273","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3797273","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6181968682","display_name":null,"funder_award_id":"92367103","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W2560544142","https://openalex.org/W2565639579","https://openalex.org/W2752782242","https://openalex.org/W2922509574","https://openalex.org/W2963323244","https://openalex.org/W2963351448","https://openalex.org/W3034552520","https://openalex.org/W3144064738","https://openalex.org/W4317496717","https://openalex.org/W4390813021","https://openalex.org/W4399728007","https://openalex.org/W4400578788","https://openalex.org/W4406457520"],"related_works":[],"abstract_inverted_index":{"3D":[0,59,74],"object":[1,75,100],"detection":[2,76,123],"plays":[3],"a":[4,72,129,137,187],"pivotal":[5],"role":[6],"in":[7,12,26,51],"facilitating":[8],"comprehensive":[9],"scene":[10],"understanding":[11],"autonomous":[13],"driving":[14],"systems.":[15],"One":[16],"of":[17,49,58,93,191],"its":[18],"key":[19],"challenges":[20],"is":[21],"to":[22,44,89,140,161,193],"achieve":[23],"accurate":[24,56],"perception":[25],"complex":[27],"environments.":[28],"Compared":[29],"with":[30,132],"LiDAR":[31],"systems":[32],"and":[33,42,84,109,144,170],"stereo-vision":[34],"approaches,":[35],"monocular":[36,52,64,73],"camera-based":[37],"solutions":[38],"are":[39,66,158],"more":[40],"cost-effective":[41],"easier":[43],"deploy.":[45],"However,":[46],"the":[47,55,91,133,145,176,184],"absence":[48],"depth":[50,95],"images":[53,65],"hinders":[54],"localization":[57],"bounding":[60],"boxes":[61],"when":[62],"only":[63],"used.":[67],"This":[68,112],"work":[69],"proposes":[70],"MonoLS,":[71],"framework":[77],"that":[78,166,180],"incorporates":[79],"lightweight":[80,103],"multi-scale":[81,104,117],"feature":[82,105,118,164],"fusion":[83,106],"spatially-aware":[85,126],"attention.":[86,154],"It":[87],"aims":[88],"address":[90],"challenge":[92],"missing":[94],"information":[96,151],"while":[97],"achieving":[98,186],"precise":[99],"localization.":[101],"First,":[102],"combines":[107],"deep":[108],"shallow":[110],"features.":[111],"design":[113],"allows":[114],"for":[115],"effective":[116],"extraction":[119],"without":[120],"compromising":[121],"real-time":[122,188],"capabilities.":[124],"Second,":[125],"attention":[127,139],"employs":[128],"dual-branch":[130],"structure,":[131],"spatial":[134,142,168],"branch":[135,147],"using":[136],"triplet":[138],"capture":[141],"details,":[143],"context":[146,150],"aggregating":[148],"global":[149,153],"through":[152],"These":[155],"two":[156],"branches":[157],"subsequently":[159],"fused":[160],"produce":[162],"enhanced":[163],"representations":[165],"preserve":[167],"distribution":[169],"semantic":[171],"richness.":[172],"Finally,":[173],"experiments":[174],"on":[175],"KITTI":[177],"dataset":[178],"demonstrate":[179],"our":[181],"method":[182],"outperforms":[183],"baseline,":[185],"inference":[189],"speed":[190],"up":[192],"67":[194],"FPS.":[195]},"counts_by_year":[],"updated_date":"2026-04-29T09:16:38.111599","created_date":"2026-03-03T00:00:00"}
