{"id":"https://openalex.org/W4408354986","doi":"https://doi.org/10.1109/icassp49660.2025.10888740","title":"MFT: Modal Fusion Transformer for Cross-Modal Fusion in 3D Object Detection","display_name":"MFT: Modal Fusion Transformer for Cross-Modal Fusion in 3D Object Detection","publication_year":2025,"publication_date":"2025-03-12","ids":{"openalex":"https://openalex.org/W4408354986","doi":"https://doi.org/10.1109/icassp49660.2025.10888740"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49660.2025.10888740","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10888740","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080472154","display_name":"Haojie Cai","orcid":null},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Haojie Cai","raw_affiliation_strings":["Shenzhen University,Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ),Shenzhen,China"],"affiliations":[{"raw_affiliation_string":"Shenzhen University,Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ),Shenzhen,China","institution_ids":["https://openalex.org/I180726961"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003762648","display_name":"Dongfu Yin","orcid":"https://orcid.org/0000-0001-8117-4341"},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dongfu Yin","raw_affiliation_strings":["Shenzhen University,Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ),Shenzhen,China"],"affiliations":[{"raw_affiliation_string":"Shenzhen University,Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ),Shenzhen,China","institution_ids":["https://openalex.org/I180726961"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003996424","display_name":"Fei Yu","orcid":"https://orcid.org/0000-0002-3091-7640"},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fei Yu","raw_affiliation_strings":["Shenzhen University,Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ),Shenzhen,China"],"affiliations":[{"raw_affiliation_string":"Shenzhen University,Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ),Shenzhen,China","institution_ids":["https://openalex.org/I180726961"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042413003","display_name":"Siting Xiong","orcid":"https://orcid.org/0000-0002-1054-121X"},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Siting Xiong","raw_affiliation_strings":["Shenzhen University,Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ),Shenzhen,China"],"affiliations":[{"raw_affiliation_string":"Shenzhen University,Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ),Shenzhen,China","institution_ids":["https://openalex.org/I180726961"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5080472154"],"corresponding_institution_ids":["https://openalex.org/I180726961"],"apc_list":null,"apc_paid":null,"fwci":6.8727,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.95239498,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12389","display_name":"Infrared Target Detection Methodologies","score":0.9243000149726868,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12389","display_name":"Infrared Target Detection Methodologies","score":0.9243000149726868,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12111","display_name":"Industrial Vision Systems and Defect Detection","score":0.920799970626831,"subfield":{"id":"https://openalex.org/subfields/2209","display_name":"Industrial and Manufacturing Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.7182573676109314},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.6706522703170776},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5913891196250916},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5313433408737183},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3356623351573944},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1861889660358429},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.13339394330978394},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.11858475208282471},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.10916075110435486}],"concepts":[{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.7182573676109314},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.6706522703170776},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5913891196250916},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5313433408737183},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3356623351573944},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1861889660358429},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.13339394330978394},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.11858475208282471},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.10916075110435486},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49660.2025.10888740","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10888740","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W2194775991","https://openalex.org/W2897529137","https://openalex.org/W2963400571","https://openalex.org/W2970987838","https://openalex.org/W3035574168","https://openalex.org/W3096609285","https://openalex.org/W3138516171","https://openalex.org/W3170030651","https://openalex.org/W3209639308","https://openalex.org/W4312312588","https://openalex.org/W4312707458","https://openalex.org/W4383066393","https://openalex.org/W4383097697","https://openalex.org/W4385245566","https://openalex.org/W4390872346","https://openalex.org/W4390872833","https://openalex.org/W4390874049","https://openalex.org/W4390874155","https://openalex.org/W4390874213","https://openalex.org/W4401415883","https://openalex.org/W6757817989","https://openalex.org/W6760424586","https://openalex.org/W6767379092","https://openalex.org/W6779586474","https://openalex.org/W6784094891","https://openalex.org/W6799331316","https://openalex.org/W6802311648","https://openalex.org/W6803556390","https://openalex.org/W6838873368","https://openalex.org/W6846908210","https://openalex.org/W6859231248"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Increasing":[0],"attention":[1,154],"has":[2],"been":[3],"garnered":[4],"by":[5,47,140],"LiDAR":[6,69,121],"points":[7,70],"and":[8,71,87,107,123,128,172],"multi-view":[9,72,124],"images":[10,73,134],"fusion":[11,28,56],"based":[12,57],"on":[13],"Transformer":[14,66],"for":[15,26,59,100],"supplementing":[16],"another":[17],"modality":[18],"in":[19,111],"3D":[20,60],"object":[21,61,109,132],"detection.":[22],"However,":[23],"challenges":[24],"persist":[25],"cross-modal":[27],"methods":[29],"due":[30],"to":[31,39,137,159],"the":[32,103,115,161],"heterogeneity":[33],"of":[34,117,131],"these":[35],"two":[36],"modalities,":[37],"leading":[38],"issues":[40],"such":[41],"as":[42],"inaccurate":[43],"detection":[44,62],"results":[45],"encountered":[46],"Transformer-based":[48],"methods.":[49],"In":[50],"this":[51],"work,":[52],"a":[53,78,148,179],"one-way":[54],"mid-level":[55],"framework":[58],"named":[63,155],"Modal":[64,89,149],"Fusion":[65,150],"(MFT)":[67],"using":[68],"is":[74,98,135,157],"introduced.":[75],"It":[76],"comprises":[77],"Depth-Guided":[79],"Generation(DGG)":[80],"module,":[81],"Position":[82],"Encoding":[83],"Generation":[84],"(PEG)":[85],"module":[86],"Cross":[88],"Fusion(CMF)":[90],"module.":[91],"Specifically,":[92],"depth":[93,105],"information":[94,130],"from":[95,120,133],"point":[96,138],"cloud":[97],"utilized":[99],"both":[101],"gathering":[102],"image":[104,125],"map":[106],"initializing":[108],"queries":[110],"DGG.":[112],"PEG":[113],"unifies":[114],"form":[116],"position":[118,129],"encoding":[119],"features":[122],"features.":[126],"Depth":[127],"aggregated":[136],"clouds":[139],"CMF,":[141],"which":[142],"fully":[143],"explores":[144],"dual-modal":[145],"information.":[146],"Furthermore,":[147],"Network":[151],"with":[152,167],"deformable":[153],"fast-MFT":[156,173],"introduced":[158],"reduce":[160],"relatively":[162],"large":[163],"computational":[164],"cost":[165],"associated":[166],"global":[168],"attention.":[169],"Our":[170],"MFT":[171],"achieve":[174],"competitive":[175],"performance":[176],"while":[177],"maintaining":[178],"faster":[180],"inference":[181],"speed":[182],"than":[183],"other":[184],"models.":[185]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2025-12-28T23:10:05.387466","created_date":"2025-10-10T00:00:00"}
