{"id":"https://openalex.org/W7137988274","doi":"https://doi.org/10.1609/aaai.v40i8.37603","title":"SM3Det: A Unified Model for Multi-Modal Remote Sensing Object Detection","display_name":"SM3Det: A Unified Model for Multi-Modal Remote Sensing Object Detection","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7137988274","doi":"https://doi.org/10.1609/aaai.v40i8.37603"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i8.37603","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i8.37603","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/37603/41565","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/37603/41565","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129692576","display_name":"Yuxuan Li","orcid":null},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yuxuan Li","raw_affiliation_strings":["Nankai University"],"affiliations":[{"raw_affiliation_string":"Nankai University","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129722839","display_name":"Xiang Li","orcid":null},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiang Li","raw_affiliation_strings":["Nankai University"],"affiliations":[{"raw_affiliation_string":"Nankai University","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126121730","display_name":"Yunheng Li","orcid":null},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunheng Li","raw_affiliation_strings":["Nankai University"],"affiliations":[{"raw_affiliation_string":"Nankai University","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114147475","display_name":"Yicheng Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yicheng Zhang","raw_affiliation_strings":["Nankai University"],"affiliations":[{"raw_affiliation_string":"Nankai University","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129719414","display_name":"Yimian Dai","orcid":null},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yimian Dai","raw_affiliation_strings":["Nankai University"],"affiliations":[{"raw_affiliation_string":"Nankai University","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129653984","display_name":"Qibin Hou","orcid":null},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qibin Hou","raw_affiliation_strings":["Nankai University"],"affiliations":[{"raw_affiliation_string":"Nankai University","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129681216","display_name":"Ming-Ming Cheng","orcid":null},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ming-Ming Cheng","raw_affiliation_strings":["Nankai University"],"affiliations":[{"raw_affiliation_string":"Nankai University","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129722386","display_name":"Jian Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jian Yang","raw_affiliation_strings":["Nankai University"],"affiliations":[{"raw_affiliation_string":"Nankai University","institution_ids":["https://openalex.org/I205237279"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5129692576"],"corresponding_institution_ids":["https://openalex.org/I205237279"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.26044776,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"8","first_page":"6717","last_page":"6725"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.8385999798774719,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.8385999798774719,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.08100000023841858,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10689","display_name":"Remote-Sensing Image Classification","score":0.030500000342726707,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6808000206947327},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.66839998960495},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5774999856948853},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5680999755859375},{"id":"https://openalex.org/keywords/unified-model","display_name":"Unified Model","score":0.5667999982833862},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.5655999779701233},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5230000019073486},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.3637000024318695},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.3625999987125397}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7918000221252441},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6808000206947327},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.66839998960495},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6251000165939331},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5774999856948853},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5680999755859375},{"id":"https://openalex.org/C45493050","wikidata":"https://www.wikidata.org/wiki/Q7884934","display_name":"Unified Model","level":2,"score":0.5667999982833862},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.5655999779701233},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5230000019073486},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.43160000443458557},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.3637000024318695},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.3625999987125397},{"id":"https://openalex.org/C183365957","wikidata":"https://www.wikidata.org/wiki/Q17140402","display_name":"Remote sensing application","level":3,"score":0.3617999851703644},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.34139999747276306},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.33309999108314514},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.3292999863624573},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.31790000200271606},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.31709998846054077},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.3133000135421753},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.31049999594688416},{"id":"https://openalex.org/C62649853","wikidata":"https://www.wikidata.org/wiki/Q199687","display_name":"Remote sensing","level":1,"score":0.3061000108718872},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.30410000681877136},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.30320000648498535},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.301800012588501},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.3010999858379364},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2775000035762787},{"id":"https://openalex.org/C101814296","wikidata":"https://www.wikidata.org/wiki/Q5439685","display_name":"Feature model","level":3,"score":0.275299996137619},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.257099986076355}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i8.37603","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i8.37603","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/37603/41565","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i8.37603","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i8.37603","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/37603/41565","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7137988274.pdf","grobid_xml":"https://content.openalex.org/works/W7137988274.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"With":[0],"the":[1,41,49,92,101,182],"rapid":[2],"advancement":[3],"of":[4,103,166,184],"remote":[5,71],"sensing":[6],"technology,":[7],"high-resolution":[8],"multi-modal":[9,97],"imagery":[10],"is":[11],"now":[12],"more":[13,53],"widely":[14],"accessible.":[15],"Conventional":[16],"object":[17,127],"detection":[18],"models":[19,186],"are":[20],"trained":[21],"on":[22,187],"a":[23,29,59,111,116,131,152],"single":[24],"dataset,":[25],"often":[26],"restricted":[27],"to":[28,74,90,136,161],"specific":[30],"imaging":[31],"modality":[32],"and":[33,47,65,99,114,125,155,171,178],"annotation":[34],"format.":[35],"However,":[36],"such":[37],"an":[38],"approach":[39],"overlooks":[40],"valuable":[42],"shared":[43],"knowledge":[44,139],"across":[45,169],"multi-modalities":[46],"limits":[48],"model\u2019s":[50],"applicability":[51],"in":[52,95],"versatile":[54],"scenarios.":[55],"This":[56,85],"paper":[57],"introduces":[58],"new":[60],"task":[61,86],"called":[62],"Multi-Modal":[63,123],"Datasets":[64],"Multi-Task":[66,126],"Object":[67],"Detection":[68],"(M2Det)":[69],"for":[70,122,146],"sensing,":[72],"designed":[73],"accurately":[75],"detect":[76],"horizontal":[77],"or":[78],"oriented":[79],"objects":[80],"from":[81],"any":[82],"sensor":[83],"modality.":[84],"poses":[87],"challenges":[88],"due":[89],"1)":[91],"trade-offs":[93],"involved":[94],"managing":[96],"modelling":[98],"2)":[100],"complexities":[102],"multi-task":[104],"optimization.":[105],"To":[106],"address":[107],"these,":[108],"we":[109,150],"establish":[110],"benchmark":[112],"dataset":[113],"propose":[115,151],"unified":[117],"model,":[118],"SM3Det":[119,129],"(Single":[120],"Model":[121],"datasets":[124],"Detection).":[128],"leverages":[130],"grid-level":[132],"sparse":[133],"MoE":[134],"backbone":[135],"enable":[137],"joint":[138],"learning":[140,167],"while":[141],"preserving":[142],"distinct":[143],"feature":[144],"representations":[145],"different":[147],"modalities.":[148],"Furthermore,":[149],"novel":[153],"consistency":[154],"synchronization":[156],"optimization":[157],"mechanism,":[158],"allowing":[159],"it":[160],"effectively":[162],"handle":[163],"varying":[164],"levels":[165],"difficulty":[168],"modalities":[170],"tasks.":[172],"Extensive":[173],"experiments":[174],"demonstrate":[175],"SM3Det's":[176],"effectiveness":[177],"generalizability,":[179],"consistently":[180],"outperforming":[181],"combination":[183],"specialized":[185],"individual":[188],"datasets.":[189]},"counts_by_year":[],"updated_date":"2026-03-20T20:47:17.329874","created_date":"2026-03-18T00:00:00"}
