{"id":"https://openalex.org/W4399939098","doi":"https://doi.org/10.1109/tcsvt.2024.3418965","title":"MMI-Det: Exploring Multi-Modal Integration for Visible and Infrared Object Detection","display_name":"MMI-Det: Exploring Multi-Modal Integration for Visible and Infrared Object Detection","publication_year":2024,"publication_date":"2024-06-24","ids":{"openalex":"https://openalex.org/W4399939098","doi":"https://doi.org/10.1109/tcsvt.2024.3418965"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2024.3418965","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2024.3418965","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5013582115","display_name":"Yuqiao Zeng","orcid":"https://orcid.org/0009-0004-1382-9448"},"institutions":[{"id":"https://openalex.org/I21193070","display_name":"Beijing Jiaotong University","ror":"https://ror.org/01yj56c84","country_code":"CN","type":"education","lineage":["https://openalex.org/I21193070"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yuqiao Zeng","raw_affiliation_strings":["Key Laboratory of Big Data and Artificial Intelligence in Transportation, Ministry of Education, the State Key Laboratory of Advanced Rail Autonomous Operation, and the School of Computer and Information Technology, Beijing Jiaotong University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Big Data and Artificial Intelligence in Transportation, Ministry of Education, the State Key Laboratory of Advanced Rail Autonomous Operation, and the School of Computer and Information Technology, Beijing Jiaotong University, Beijing, China","institution_ids":["https://openalex.org/I21193070"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062309968","display_name":"Tengfei Liang","orcid":"https://orcid.org/0000-0001-8193-3096"},"institutions":[{"id":"https://openalex.org/I21193070","display_name":"Beijing Jiaotong University","ror":"https://ror.org/01yj56c84","country_code":"CN","type":"education","lineage":["https://openalex.org/I21193070"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tengfei Liang","raw_affiliation_strings":["Key Laboratory of Big Data and Artificial Intelligence in Transportation, Ministry of Education, the State Key Laboratory of Advanced Rail Autonomous Operation, and the School of Computer and Information Technology, Beijing Jiaotong University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Big Data and Artificial Intelligence in Transportation, Ministry of Education, the State Key Laboratory of Advanced Rail Autonomous Operation, and the School of Computer and Information Technology, Beijing Jiaotong University, Beijing, China","institution_ids":["https://openalex.org/I21193070"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033896100","display_name":"Yi Jin","orcid":"https://orcid.org/0000-0001-8408-3816"},"institutions":[{"id":"https://openalex.org/I21193070","display_name":"Beijing Jiaotong University","ror":"https://ror.org/01yj56c84","country_code":"CN","type":"education","lineage":["https://openalex.org/I21193070"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi Jin","raw_affiliation_strings":["Key Laboratory of Big Data and Artificial Intelligence in Transportation, Ministry of Education, the State Key Laboratory of Advanced Rail Autonomous Operation, and the School of Computer and Information Technology, Beijing Jiaotong University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Big Data and Artificial Intelligence in Transportation, Ministry of Education, the State Key Laboratory of Advanced Rail Autonomous Operation, and the School of Computer and Information Technology, Beijing Jiaotong University, Beijing, China","institution_ids":["https://openalex.org/I21193070"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5010019122","display_name":"Yidong Li","orcid":"https://orcid.org/0000-0003-2965-6196"},"institutions":[{"id":"https://openalex.org/I21193070","display_name":"Beijing Jiaotong University","ror":"https://ror.org/01yj56c84","country_code":"CN","type":"education","lineage":["https://openalex.org/I21193070"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yidong Li","raw_affiliation_strings":["Key Laboratory of Big Data and Artificial Intelligence in Transportation, Ministry of Education, the State Key Laboratory of Advanced Rail Autonomous Operation, and the School of Computer and Information Technology, Beijing Jiaotong University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Big Data and Artificial Intelligence in Transportation, Ministry of Education, the State Key Laboratory of Advanced Rail Autonomous Operation, and the School of Computer and Information Technology, Beijing Jiaotong University, Beijing, China","institution_ids":["https://openalex.org/I21193070"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5013582115"],"corresponding_institution_ids":["https://openalex.org/I21193070"],"apc_list":null,"apc_paid":null,"fwci":42.7497,"has_fulltext":false,"cited_by_count":31,"citation_normalized_percentile":{"value":0.99772744,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"34","issue":"11","first_page":"11198","last_page":"11213"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12389","display_name":"Infrared Target Detection Methodologies","score":0.9883999824523926,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12389","display_name":"Infrared Target Detection Methodologies","score":0.9883999824523926,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5885464549064636},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.575873851776123},{"id":"https://openalex.org/keywords/infrared","display_name":"Infrared","score":0.5477626919746399},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.5356048941612244},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5193303823471069},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5192644596099854},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.4104112386703491},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.2927623391151428},{"id":"https://openalex.org/keywords/optics","display_name":"Optics","score":0.2487964928150177},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.17717349529266357},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.11031222343444824}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5885464549064636},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.575873851776123},{"id":"https://openalex.org/C158355884","wikidata":"https://www.wikidata.org/wiki/Q11388","display_name":"Infrared","level":2,"score":0.5477626919746399},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.5356048941612244},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5193303823471069},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5192644596099854},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.4104112386703491},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2927623391151428},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.2487964928150177},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.17717349529266357},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.11031222343444824},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2024.3418965","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2024.3418965","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1381354293","display_name":null,"funder_award_id":"U2268203","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7926215319","display_name":null,"funder_award_id":"IVSTSKL-202327","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W1536680647","https://openalex.org/W1989469598","https://openalex.org/W2061273563","https://openalex.org/W2124353687","https://openalex.org/W2586298574","https://openalex.org/W2773031116","https://openalex.org/W2775714787","https://openalex.org/W2792830341","https://openalex.org/W2798987894","https://openalex.org/W2811432216","https://openalex.org/W2919059038","https://openalex.org/W2963787388","https://openalex.org/W3031508018","https://openalex.org/W3046194589","https://openalex.org/W3114128942","https://openalex.org/W3118570274","https://openalex.org/W3126855404","https://openalex.org/W3159018159","https://openalex.org/W3198582484","https://openalex.org/W4200631567","https://openalex.org/W4220747472","https://openalex.org/W4220893768","https://openalex.org/W4283732315","https://openalex.org/W4304080867","https://openalex.org/W4307552296","https://openalex.org/W4313066135","https://openalex.org/W4313494359","https://openalex.org/W4313555022","https://openalex.org/W4321021766","https://openalex.org/W4377231435","https://openalex.org/W4379209584","https://openalex.org/W4383503983","https://openalex.org/W4386076325","https://openalex.org/W4386076504","https://openalex.org/W4386189887","https://openalex.org/W4386918784","https://openalex.org/W4387968982","https://openalex.org/W4392908033","https://openalex.org/W4393241074","https://openalex.org/W6756834165","https://openalex.org/W6845935626"],"related_works":["https://openalex.org/W2374614594","https://openalex.org/W2379392295","https://openalex.org/W3160965418","https://openalex.org/W613940353","https://openalex.org/W2391549584","https://openalex.org/W2320915480","https://openalex.org/W2737719445","https://openalex.org/W2362091980","https://openalex.org/W4292830139","https://openalex.org/W4319309705"],"abstract_inverted_index":{"The":[0,54,105],"Visible-Infrared":[1],"(VIS-IR)":[2],"object":[3,49,103,124,185,225],"detection":[4,8,50,83],"is":[5,35,94],"a":[6,95,109,234],"challenging":[7],"task,":[9],"which":[10,93],"combines":[11],"visible":[12,42,100,173],"and":[13,22,43,66,71,82,101,119,122,155,174,177,224,256,259],"infrared":[14,44,102,175],"data":[15],"to":[16,36,46,64,127,134,149,197,201,211,236],"give":[17],"information":[18,39,70,114,152,182,272],"on":[19,179,249],"the":[20,27,30,41,59,76,80,91,116,129,132,138,144,160,172,180,184,193,199,207,218,229,238,241,250],"category":[21],"location":[23],"of":[24,32,61,79,112,131,171,183,221,240],"objects":[25],"in":[26,40,74,115,206],"scene.":[28,209],"Therefore,":[29],"core":[31],"this":[33],"task":[34],"combine":[37,67],"complementary":[38,113,151],"modalities":[45,118,176],"provide":[47,108],"more":[48],"results":[51],"for":[52,99],"detection.":[53,104],"existing":[55],"methods":[56],"mainly":[57],"face":[58],"problem":[60],"insufficient":[62],"ability":[63,130,200],"perceive":[65,135],"visible-infrared":[68,117,139,208],"modal":[69,203],"have":[72],"difficulty":[73],"balancing":[75],"optimization":[77,219],"directions":[78,220],"fusion":[81,97,223],"tasks.":[84],"To":[85],"solve":[86],"these":[87],"problem,":[88],"we":[89,142,158,191,227],"propose":[90],"MMI-Det":[92],"multi-modal":[96,271],"method":[98,106,265],"can":[107,165,216],"good":[110],"combination":[111],"output":[120],"accurate":[121],"robust":[123],"information.":[125],"Specifically,":[126],"improve":[128,198,237],"model":[133,215],"environment":[136],"at":[137,186],"image":[140,222],"level,":[141],"designed":[143],"Contour":[145],"Enhancement":[146],"Module.":[147,163],"Furthermore,":[148],"extract":[150,166,202],"from":[153],"VIS":[154],"IR":[156],"modalities,":[157],"design":[159,192,228],"Fusion":[161],"Focus":[162],"It":[164],"different":[167,187],"frequency":[168],"spectral":[169],"features":[170,205],"focus":[178],"key":[181],"spatial":[188],"locations.":[189],"Moreover,":[190],"Contrast":[194],"Bridge":[195],"Module":[196,232],"invariant":[204],"Finally,":[210],"ensure":[212],"that":[213],"our":[214,264],"balance":[217],"detection,":[226],"Info":[230],"Guided":[231],"as":[233],"way":[235],"effectiveness":[239],"model\u2019s":[242],"training":[243],"optimization.":[244],"We":[245],"implement":[246],"extensive":[247],"experiments":[248],"public":[251],"FLIR,":[252],"M3FD,":[253],"LLVIP,":[254],"TNO":[255],"MSRS":[257],"datasets,":[258],"compared":[260],"with":[261,269],"previous":[262],"methods,":[263],"achieves":[266],"better":[267],"performance":[268],"powerful":[270],"perception":[273],"capabilities.":[274]},"counts_by_year":[{"year":2026,"cited_by_count":9},{"year":2025,"cited_by_count":21},{"year":2024,"cited_by_count":1}],"updated_date":"2026-04-18T07:56:08.524223","created_date":"2025-10-10T00:00:00"}
