{"id":"https://openalex.org/W4409149494","doi":"https://doi.org/10.1007/s44267-025-00075-0","title":"MIRSAM: multimodal vision-language segment anything model for infrared small target detection","display_name":"MIRSAM: multimodal vision-language segment anything model for infrared small target detection","publication_year":2025,"publication_date":"2025-04-02","ids":{"openalex":"https://openalex.org/W4409149494","doi":"https://doi.org/10.1007/s44267-025-00075-0"},"language":"en","primary_location":{"id":"doi:10.1007/s44267-025-00075-0","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s44267-025-00075-0","pdf_url":"https://link.springer.com/content/pdf/10.1007/s44267-025-00075-0.pdf","source":{"id":"https://openalex.org/S4387289164","display_name":"Visual Intelligence","issn_l":"2731-9008","issn":["2731-9008"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Visual Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://link.springer.com/content/pdf/10.1007/s44267-025-00075-0.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101464703","display_name":"Mingjin Zhang","orcid":"https://orcid.org/0000-0002-1473-9784"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Mingjin Zhang","raw_affiliation_strings":["Xidian University, Xi\u2019an, China"],"raw_orcid":"https://orcid.org/0000-0002-1473-9784","affiliations":[{"raw_affiliation_string":"Xidian University, Xi\u2019an, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100461059","display_name":"Qian Xu","orcid":"https://orcid.org/0000-0001-5690-0583"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qian Xu","raw_affiliation_strings":["Xidian University, Xi\u2019an, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Xidian University, Xi\u2019an, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100774056","display_name":"Yuchun Wang","orcid":"https://orcid.org/0000-0003-2719-782X"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuchun Wang","raw_affiliation_strings":["Xidian University, Xi\u2019an, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Xidian University, Xi\u2019an, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Xi Li","orcid":"https://orcid.org/0000-0001-6025-377X"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xi Li","raw_affiliation_strings":["Xidian University, Xi\u2019an, China"],"raw_orcid":"https://orcid.org/0000-0001-6025-377X","affiliations":[{"raw_affiliation_string":"Xidian University, Xi\u2019an, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5026608960","display_name":"Haojuan Yuan","orcid":null},"institutions":[{"id":"https://openalex.org/I4210146919","display_name":"Shanghai Industrial Technology Institute","ror":"https://ror.org/03j1pdd39","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210146919"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haojuan Yuan","raw_affiliation_strings":["Shanghai Aerospace Electronic Technology Institute, Shanghai, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shanghai Aerospace Electronic Technology Institute, Shanghai, China","institution_ids":["https://openalex.org/I4210146919"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101464703"],"corresponding_institution_ids":["https://openalex.org/I149594827"],"apc_list":null,"apc_paid":null,"fwci":20.3065,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.98849512,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"3","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12389","display_name":"Infrared Target Detection Methodologies","score":1.0,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12389","display_name":"Infrared Target Detection Methodologies","score":1.0,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14257","display_name":"Advanced Measurement and Detection Methods","score":0.9918000102043152,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11856","display_name":"Thermography and Photoacoustic Techniques","score":0.98580002784729,"subfield":{"id":"https://openalex.org/subfields/2211","display_name":"Mechanics of Materials"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5179009437561035},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5082228183746338},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4723021984100342},{"id":"https://openalex.org/keywords/infrared","display_name":"Infrared","score":0.4128338694572449},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3257880210876465},{"id":"https://openalex.org/keywords/optics","display_name":"Optics","score":0.09921333193778992},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.09277459979057312}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5179009437561035},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5082228183746338},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4723021984100342},{"id":"https://openalex.org/C158355884","wikidata":"https://www.wikidata.org/wiki/Q11388","display_name":"Infrared","level":2,"score":0.4128338694572449},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3257880210876465},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.09921333193778992},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.09277459979057312}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1007/s44267-025-00075-0","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s44267-025-00075-0","pdf_url":"https://link.springer.com/content/pdf/10.1007/s44267-025-00075-0.pdf","source":{"id":"https://openalex.org/S4387289164","display_name":"Visual Intelligence","issn_l":"2731-9008","issn":["2731-9008"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Visual Intelligence","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:c349d530c5804f7abbd1c79a6c3c0ea5","is_oa":true,"landing_page_url":"https://doaj.org/article/c349d530c5804f7abbd1c79a6c3c0ea5","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Visual Intelligence, Vol 3, Iss 1, Pp 1-13 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1007/s44267-025-00075-0","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s44267-025-00075-0","pdf_url":"https://link.springer.com/content/pdf/10.1007/s44267-025-00075-0.pdf","source":{"id":"https://openalex.org/S4387289164","display_name":"Visual Intelligence","issn_l":"2731-9008","issn":["2731-9008"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Visual Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1454346147","display_name":null,"funder_award_id":"Grant 62272363","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1713521428","display_name":null,"funder_award_id":"2024KFKT001-1","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3221460277","display_name":null,"funder_award_id":"92470108","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6386153180","display_name":null,"funder_award_id":"2021QNRC001","funder_id":"https://openalex.org/F4320311778","funder_display_name":"China Association for Science and Technology"},{"id":"https://openalex.org/G6665774762","display_name":null,"funder_award_id":"2021QNRC001","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6976320768","display_name":null,"funder_award_id":"62272363","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320311778","display_name":"China Association for Science and Technology","ror":"https://ror.org/035vmht26"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4409149494.pdf","grobid_xml":"https://content.openalex.org/works/W4409149494.grobid-xml"},"referenced_works_count":56,"referenced_works":["https://openalex.org/W1583837637","https://openalex.org/W1978993121","https://openalex.org/W2006851788","https://openalex.org/W2041560658","https://openalex.org/W2407220925","https://openalex.org/W2541371665","https://openalex.org/W2544366167","https://openalex.org/W2604768956","https://openalex.org/W2780449407","https://openalex.org/W2790021535","https://openalex.org/W2900678331","https://openalex.org/W2912083425","https://openalex.org/W2912919760","https://openalex.org/W2914304175","https://openalex.org/W2914578055","https://openalex.org/W2972003492","https://openalex.org/W3010079414","https://openalex.org/W3039443125","https://openalex.org/W3048644861","https://openalex.org/W3088317060","https://openalex.org/W3089723045","https://openalex.org/W3094502228","https://openalex.org/W3107500664","https://openalex.org/W3112389165","https://openalex.org/W3118934234","https://openalex.org/W3135367836","https://openalex.org/W3171950886","https://openalex.org/W4200169950","https://openalex.org/W4200252498","https://openalex.org/W4200394859","https://openalex.org/W4206367471","https://openalex.org/W4206913762","https://openalex.org/W4285804308","https://openalex.org/W4292337114","https://openalex.org/W4304084055","https://openalex.org/W4313065862","https://openalex.org/W4313506322","https://openalex.org/W4362500629","https://openalex.org/W4362655579","https://openalex.org/W4364385338","https://openalex.org/W4368755247","https://openalex.org/W4382401167","https://openalex.org/W4382567565","https://openalex.org/W4384111859","https://openalex.org/W4387801948","https://openalex.org/W4387947074","https://openalex.org/W4391109864","https://openalex.org/W4391679787","https://openalex.org/W4392489527","https://openalex.org/W4392903290","https://openalex.org/W4394709642","https://openalex.org/W4405022676","https://openalex.org/W6739901393","https://openalex.org/W6793968381","https://openalex.org/W6851578965","https://openalex.org/W6853702739"],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"Abstract":[0],"Infrared":[1,109],"small":[2,47],"target":[3],"detection":[4],"(IRSTD)":[5],"plays":[6],"a":[7,31,149],"crucial":[8],"role":[9],"in":[10,43,56,82,91,125,131],"applications":[11],"such":[12],"as":[13,200],"traffic":[14],"monitoring":[15],"systems":[16],"and":[17,41,48,54,73,133,138,167],"maritime":[18],"rescue.":[19],"However,":[20],"existing":[21],"IRSTD":[22,124,216],"methods":[23,60],"face":[24],"challenges":[25],"due":[26],"to":[27,39,88,116,172,190,202],"their":[28],"reliance":[29],"on":[30,64,223],"single":[32],"type":[33],"of":[34,185],"data,":[35],"making":[36],"them":[37],"susceptible":[38],"noise":[40,132,163],"deficient":[42],"contextual":[44],"understanding.":[45],"Additionally,":[46],"limited":[49],"datasets":[50],"hinder":[51],"model":[52,145],"generalization":[53],"performance":[55],"complex":[57],"scenarios.":[58],"Previous":[59],"are":[61,68],"mostly":[62],"based":[63],"U-Net":[65],"architectures":[66],"that":[67,230],"optimized":[69],"for":[70,108,123],"small-scale":[71],"data":[72],"involve":[74],"intricate":[75],"design.":[76],"These":[77],"designs":[78],"often":[79],"perform":[80],"well":[81],"specific":[83],"scenarios,":[84],"but":[85],"they":[86],"struggle":[87],"generalize":[89],"effectively":[90],"real-world":[92],"applications.":[93],"Inspired":[94],"by":[95,147],"leading":[96],"vision-language":[97],"models,":[98],"we":[99,141,180,211],"propose":[100],"an":[101],"MIRSAM":[102,233],"(Multimodal":[103],"Vision-Language":[104],"Segment":[105],"Anything":[106],"Model":[107],"Small":[110],"Target":[111],"Detection),":[112],"the":[113,129,173,182,207,213,224,231],"first":[114,214],"framework":[115],"integrate":[117],"text":[118,183,192],"modality":[119,122],"with":[120],"image":[121,158],"this":[126,160],"article.":[127],"Given":[128],"differences":[130],"structural":[134],"information":[135],"between":[136],"infrared":[137,174],"natural":[139],"images,":[140],"fine-tune":[142],"segment":[143],"anything":[144],"(SAM)":[146],"designing":[148],"contourlet":[150],"denoising":[151],"adapter":[152],"module":[153,161],"(CDAM).":[154],"Integrated":[155],"into":[156,193],"SAM\u2019s":[157],"encoder,":[159],"suppresses":[162],"during":[164],"feature":[165,195],"extraction":[166],"encoding,":[168],"enabling":[169],"efficient":[170],"adaptation":[171],"domain.":[175],"To":[176],"incorporate":[177],"textual":[178],"information,":[179],"utilize":[181],"encoder":[184],"contrastive":[186],"language-image":[187],"pre-training":[188],"(CLIP)":[189],"convert":[191],"high-dimensional":[194],"vectors,":[196],"which":[197],"then":[198],"serve":[199],"prompts":[201],"extract":[203],"relevant":[204],"details":[205],"from":[206],"features.":[208],"In":[209],"addition,":[210],"build":[212],"multimodal":[215],"dataset,":[217],"IR-TXPair,":[218],"containing":[219],"image-text":[220],"pairs.":[221],"Experiments":[222],"newly":[225],"constructed":[226],"IR-TXPair":[227],"dataset":[228],"demonstrate":[229],"proposed":[232],"outperforms":[234],"state-of-the-art":[235],"methods.":[236]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":3}],"updated_date":"2026-06-02T09:04:35.204637","created_date":"2025-10-10T00:00:00"}
