{"id":"https://openalex.org/W4405934112","doi":"https://doi.org/10.1109/tcsvt.2024.3524645","title":"MSCoTDet: Language-Driven Multi-Modal Fusion for Improved Multispectral Pedestrian Detection","display_name":"MSCoTDet: Language-Driven Multi-Modal Fusion for Improved Multispectral Pedestrian Detection","publication_year":2024,"publication_date":"2024-12-31","ids":{"openalex":"https://openalex.org/W4405934112","doi":"https://doi.org/10.1109/tcsvt.2024.3524645"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2024.3524645","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2024.3524645","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Taeheon Kim","orcid":"https://orcid.org/0000-0001-6334-3025"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Taeheon Kim","raw_affiliation_strings":["Integrated Vision and Language Laboratory, School of Electrical Engineering, Korea Advanced Institute of Science and Technology (KAIST), Yuseong, Daejeon, Republic of Korea","Integrated Vision and Language Lab, School of Electrical Engineering, Korea Advanced Institute of Science and Technology (KAIST), 291, Daehak-ro, Yuseong-gu, Daejeon, Republic of Korea"],"raw_orcid":"https://orcid.org/0000-0001-6334-3025","affiliations":[{"raw_affiliation_string":"Integrated Vision and Language Laboratory, School of Electrical Engineering, Korea Advanced Institute of Science and Technology (KAIST), Yuseong, Daejeon, Republic of Korea","institution_ids":["https://openalex.org/I157485424"]},{"raw_affiliation_string":"Integrated Vision and Language Lab, School of Electrical Engineering, Korea Advanced Institute of Science and Technology (KAIST), 291, Daehak-ro, Yuseong-gu, Daejeon, Republic of Korea","institution_ids":["https://openalex.org/I157485424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113258484","display_name":"Sangyun Chung","orcid":"https://orcid.org/0009-0004-5397-2687"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Sangyun Chung","raw_affiliation_strings":["Integrated Vision and Language Laboratory, School of Electrical Engineering, Korea Advanced Institute of Science and Technology (KAIST), Yuseong, Daejeon, Republic of Korea","Integrated Vision and Language Lab, School of Electrical Engineering, Korea Advanced Institute of Science and Technology (KAIST), 291, Daehak-ro, Yuseong-gu, Daejeon, Republic of Korea"],"raw_orcid":"https://orcid.org/0009-0004-5397-2687","affiliations":[{"raw_affiliation_string":"Integrated Vision and Language Laboratory, School of Electrical Engineering, Korea Advanced Institute of Science and Technology (KAIST), Yuseong, Daejeon, Republic of Korea","institution_ids":["https://openalex.org/I157485424"]},{"raw_affiliation_string":"Integrated Vision and Language Lab, School of Electrical Engineering, Korea Advanced Institute of Science and Technology (KAIST), 291, Daehak-ro, Yuseong-gu, Daejeon, Republic of Korea","institution_ids":["https://openalex.org/I157485424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112976315","display_name":"Damin Yeom","orcid":null},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Damin Yeom","raw_affiliation_strings":["Integrated Vision and Language Laboratory, School of Electrical Engineering, Korea Advanced Institute of Science and Technology (KAIST), Yuseong, Daejeon, Republic of Korea","Integrated Vision and Language Lab, School of Electrical Engineering, Korea Advanced Institute of Science and Technology (KAIST), 291, Daehak-ro, Yuseong-gu, Daejeon, Republic of Korea"],"raw_orcid":"https://orcid.org/0009-0006-8959-9057","affiliations":[{"raw_affiliation_string":"Integrated Vision and Language Laboratory, School of Electrical Engineering, Korea Advanced Institute of Science and Technology (KAIST), Yuseong, Daejeon, Republic of Korea","institution_ids":["https://openalex.org/I157485424"]},{"raw_affiliation_string":"Integrated Vision and Language Lab, School of Electrical Engineering, Korea Advanced Institute of Science and Technology (KAIST), 291, Daehak-ro, Yuseong-gu, Daejeon, Republic of Korea","institution_ids":["https://openalex.org/I157485424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067319464","display_name":"Youngjoon Yu","orcid":"https://orcid.org/0000-0002-3188-2080"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Youngjoon Yu","raw_affiliation_strings":["Integrated Vision and Language Laboratory, School of Electrical Engineering, Korea Advanced Institute of Science and Technology (KAIST), Yuseong, Daejeon, Republic of Korea","Integrated Vision and Language Lab, School of Electrical Engineering, Korea Advanced Institute of Science and Technology (KAIST), 291, Daehak-ro, Yuseong-gu, Daejeon, Republic of Korea"],"raw_orcid":"https://orcid.org/0000-0002-3188-2080","affiliations":[{"raw_affiliation_string":"Integrated Vision and Language Laboratory, School of Electrical Engineering, Korea Advanced Institute of Science and Technology (KAIST), Yuseong, Daejeon, Republic of Korea","institution_ids":["https://openalex.org/I157485424"]},{"raw_affiliation_string":"Integrated Vision and Language Lab, School of Electrical Engineering, Korea Advanced Institute of Science and Technology (KAIST), 291, Daehak-ro, Yuseong-gu, Daejeon, Republic of Korea","institution_ids":["https://openalex.org/I157485424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089050147","display_name":"Hak Gu Kim","orcid":"https://orcid.org/0000-0003-2137-934X"},"institutions":[{"id":"https://openalex.org/I67900169","display_name":"Chung-Ang University","ror":"https://ror.org/01r024a98","country_code":"KR","type":"education","lineage":["https://openalex.org/I67900169"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Hak Gu Kim","raw_affiliation_strings":["Department of Image Science and Arts, GSAIM, Chung-Ang University, Seoul, Republic of Korea"],"raw_orcid":"https://orcid.org/0000-0003-2137-934X","affiliations":[{"raw_affiliation_string":"Department of Image Science and Arts, GSAIM, Chung-Ang University, Seoul, Republic of Korea","institution_ids":["https://openalex.org/I67900169"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038798134","display_name":"Yong Man Ro","orcid":"https://orcid.org/0000-0001-5306-6853"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Yong Man Ro","raw_affiliation_strings":["Integrated Vision and Language Laboratory, School of Electrical Engineering, Korea Advanced Institute of Science and Technology (KAIST), Yuseong, Daejeon, Republic of Korea","Integrated Vision and Language Lab, School of Electrical Engineering, Korea Advanced Institute of Science and Technology (KAIST), 291, Daehak-ro, Yuseong-gu, Daejeon, Republic of Korea"],"raw_orcid":"https://orcid.org/0000-0001-5306-6853","affiliations":[{"raw_affiliation_string":"Integrated Vision and Language Laboratory, School of Electrical Engineering, Korea Advanced Institute of Science and Technology (KAIST), Yuseong, Daejeon, Republic of Korea","institution_ids":["https://openalex.org/I157485424"]},{"raw_affiliation_string":"Integrated Vision and Language Lab, School of Electrical Engineering, Korea Advanced Institute of Science and Technology (KAIST), 291, Daehak-ro, Yuseong-gu, Daejeon, Republic of Korea","institution_ids":["https://openalex.org/I157485424"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.7498,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.86568255,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"35","issue":"5","first_page":"5006","last_page":"5021"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9866999983787537,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9866999983787537,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10689","display_name":"Remote-Sensing Image Classification","score":0.9650999903678894,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13282","display_name":"Automated Road and Building Extraction","score":0.9638000130653381,"subfield":{"id":"https://openalex.org/subfields/2212","display_name":"Ocean Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/multispectral-image","display_name":"Multispectral image","score":0.8135713338851929},{"id":"https://openalex.org/keywords/pedestrian-detection","display_name":"Pedestrian detection","score":0.778736412525177},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7063912153244019},{"id":"https://openalex.org/keywords/pedestrian","display_name":"Pedestrian","score":0.5951665639877319},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.5887438654899597},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5245816111564636},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.49979662895202637},{"id":"https://openalex.org/keywords/sensor-fusion","display_name":"Sensor fusion","score":0.4417995512485504},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3257565498352051},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.12363746762275696},{"id":"https://openalex.org/keywords/transport-engineering","display_name":"Transport engineering","score":0.08178430795669556}],"concepts":[{"id":"https://openalex.org/C173163844","wikidata":"https://www.wikidata.org/wiki/Q1761440","display_name":"Multispectral image","level":2,"score":0.8135713338851929},{"id":"https://openalex.org/C2780156472","wikidata":"https://www.wikidata.org/wiki/Q2355550","display_name":"Pedestrian detection","level":3,"score":0.778736412525177},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7063912153244019},{"id":"https://openalex.org/C2777113093","wikidata":"https://www.wikidata.org/wiki/Q221488","display_name":"Pedestrian","level":2,"score":0.5951665639877319},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.5887438654899597},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5245816111564636},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.49979662895202637},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.4417995512485504},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3257565498352051},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.12363746762275696},{"id":"https://openalex.org/C22212356","wikidata":"https://www.wikidata.org/wiki/Q775325","display_name":"Transport engineering","level":1,"score":0.08178430795669556},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2024.3524645","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2024.3524645","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":91,"referenced_works":["https://openalex.org/W22229905","https://openalex.org/W1536680647","https://openalex.org/W1861492603","https://openalex.org/W1910108985","https://openalex.org/W1989085630","https://openalex.org/W2031454541","https://openalex.org/W2034996255","https://openalex.org/W2194775991","https://openalex.org/W2315907656","https://openalex.org/W2344132218","https://openalex.org/W2415234561","https://openalex.org/W2428342532","https://openalex.org/W2470413457","https://openalex.org/W2608295741","https://openalex.org/W2917092108","https://openalex.org/W2929607865","https://openalex.org/W2963188557","https://openalex.org/W2963579094","https://openalex.org/W2963644680","https://openalex.org/W2964027659","https://openalex.org/W2984009799","https://openalex.org/W2987131085","https://openalex.org/W2990501368","https://openalex.org/W3015352266","https://openalex.org/W3016916515","https://openalex.org/W3033116542","https://openalex.org/W3035145964","https://openalex.org/W3035333188","https://openalex.org/W3035517717","https://openalex.org/W3036931590","https://openalex.org/W3044256341","https://openalex.org/W3085812513","https://openalex.org/W3087642760","https://openalex.org/W3092676209","https://openalex.org/W3096609285","https://openalex.org/W3099884329","https://openalex.org/W3100801259","https://openalex.org/W3104788521","https://openalex.org/W3106754126","https://openalex.org/W3110525903","https://openalex.org/W3116967329","https://openalex.org/W3158128549","https://openalex.org/W3173018607","https://openalex.org/W3175786839","https://openalex.org/W3177934633","https://openalex.org/W3207919963","https://openalex.org/W4221143046","https://openalex.org/W4224936528","https://openalex.org/W4285107355","https://openalex.org/W4304092061","https://openalex.org/W4307965990","https://openalex.org/W4308760226","https://openalex.org/W4312706437","https://openalex.org/W4313007055","https://openalex.org/W4323655724","https://openalex.org/W4380303606","https://openalex.org/W4382468787","https://openalex.org/W4384161798","https://openalex.org/W4385734246","https://openalex.org/W4386038403","https://openalex.org/W4386065359","https://openalex.org/W4386065569","https://openalex.org/W4386075969","https://openalex.org/W4386189887","https://openalex.org/W4387789807","https://openalex.org/W4389667009","https://openalex.org/W4390873750","https://openalex.org/W4393371809","https://openalex.org/W4395664898","https://openalex.org/W4402299909","https://openalex.org/W4402713111","https://openalex.org/W4402753999","https://openalex.org/W4404780828","https://openalex.org/W6631190155","https://openalex.org/W6753836424","https://openalex.org/W6756834165","https://openalex.org/W6757817989","https://openalex.org/W6778883912","https://openalex.org/W6784094891","https://openalex.org/W6784500203","https://openalex.org/W6810081322","https://openalex.org/W6810220367","https://openalex.org/W6810738896","https://openalex.org/W6845935626","https://openalex.org/W6850625674","https://openalex.org/W6851592950","https://openalex.org/W6853562202","https://openalex.org/W6860041859","https://openalex.org/W6860809563","https://openalex.org/W6864354119","https://openalex.org/W6873942125"],"related_works":["https://openalex.org/W4318664220","https://openalex.org/W2771047279","https://openalex.org/W4388409104","https://openalex.org/W2124951708","https://openalex.org/W3132270449","https://openalex.org/W4377289091","https://openalex.org/W2972620127","https://openalex.org/W3013647784","https://openalex.org/W2981141433","https://openalex.org/W2997281059"],"abstract_inverted_index":{"Multispectral":[0,67,86],"pedestrian":[1,55,79,97,125,140],"detection":[2,56,120,126],"is":[3],"attractive":[4],"for":[5],"around-the-clock":[6],"applications":[7],"due":[8,33],"to":[9,23,34,49,76],"the":[10,35,74,113,119],"complementary":[11],"information":[12],"between":[13],"RGB":[14],"and":[15,137],"thermal":[16],"modalities.":[17],"However,":[18],"current":[19],"models":[20],"often":[21],"fail":[22],"detect":[24],"pedestrians":[25],"in":[26,53],"certain":[27],"cases":[28],"(e.g.,":[29],"thermal-obscured":[30],"pedestrians),":[31],"particularly":[32],"modality":[36,51,135],"bias":[37,52],"learned":[38],"from":[39],"statistically":[40],"biased":[41],"datasets.":[42],"In":[43],"this":[44,100],"paper,":[45],"we":[46,64,82,102],"investigate":[47],"how":[48],"mitigate":[50],"multispectral":[54,78,96,124,139],"using":[57],"a":[58,66,84,104],"Large":[59],"Language":[60],"Model":[61],"(LLM).":[62],"Accordingly,":[63],"design":[65,103],"Chain-of-Thought":[68,87],"(MSCoT)":[69],"prompting":[70,94,117],"strategy,":[71],"which":[72],"prompts":[73],"LLM":[75],"perform":[77],"detection.":[80,98,141],"Moreover,":[81],"propose":[83],"novel":[85],"Detection":[88],"(MSCoTDet)":[89],"framework":[90],"that":[91,110,131],"integrates":[92],"MSCoT":[93,116],"into":[95],"To":[99],"end,":[101],"Language-driven":[105],"Multi-modal":[106],"Fusion":[107],"(LMF)":[108],"strategy":[109],"enables":[111],"fusing":[112],"outputs":[114],"of":[115,122],"with":[118],"results":[121],"vision-based":[123],"models.":[127],"Extensive":[128],"experiments":[129],"validate":[130],"MSCoTDet":[132],"effectively":[133],"mitigates":[134],"biases":[136],"improves":[138]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":6}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-01-01T00:00:00"}
