{"id":"https://openalex.org/W7124167275","doi":"https://doi.org/10.48550/arxiv.2601.08619","title":"CtrlFuse: Mask-Prompt Guided Controllable Infrared and Visible Image Fusion","display_name":"CtrlFuse: Mask-Prompt Guided Controllable Infrared and Visible Image Fusion","publication_year":2026,"publication_date":"2026-01-12","ids":{"openalex":"https://openalex.org/W7124167275","doi":"https://doi.org/10.48550/arxiv.2601.08619"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2601.08619","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.08619","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2601.08619","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5123038476","display_name":"Yiming Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Sun, Yiming","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122994696","display_name":"Yuan Ruan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ruan, Yuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123044388","display_name":"Qinghua Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Qinghua","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5123016384","display_name":"Pengfei Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Pengfei","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5123038476"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11659","display_name":"Advanced Image Fusion Techniques","score":0.9225000143051147,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11659","display_name":"Advanced Image Fusion Techniques","score":0.9225000143051147,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11019","display_name":"Image Enhancement Techniques","score":0.04520000144839287,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.008700000122189522,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/image-fusion","display_name":"Image fusion","score":0.7149999737739563},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.7080000042915344},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.6621999740600586},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6215000152587891},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5982999801635742},{"id":"https://openalex.org/keywords/controllability","display_name":"Controllability","score":0.5687000155448914},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.5321000218391418},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4959000051021576}],"concepts":[{"id":"https://openalex.org/C69744172","wikidata":"https://www.wikidata.org/wiki/Q860822","display_name":"Image fusion","level":3,"score":0.7149999737739563},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.7080000042915344},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7060999870300293},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.676800012588501},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6621999740600586},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6620000004768372},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6215000152587891},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5982999801635742},{"id":"https://openalex.org/C48209547","wikidata":"https://www.wikidata.org/wiki/Q1331104","display_name":"Controllability","level":2,"score":0.5687000155448914},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.5321000218391418},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4959000051021576},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.4571000039577484},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.4490000009536743},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.38100001215934753},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.36880001425743103},{"id":"https://openalex.org/C177606310","wikidata":"https://www.wikidata.org/wiki/Q5674297","display_name":"Adaptability","level":2,"score":0.3271999955177307},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.30250000953674316},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.2833000123500824},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.2800000011920929},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.273499995470047},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.251800000667572}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2601.08619","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.08619","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2601.08619","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.08619","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Infrared":[0],"and":[1,79,117,128,139],"visible":[2],"image":[3,55],"fusion":[4,26,56,62,82,109,118,129,137],"generates":[5],"all-weather":[6],"perception-capable":[7],"images":[8],"by":[9,64,92],"combining":[10],"complementary":[11],"modalities,":[12],"enhancing":[13],"environmental":[14],"awareness":[15],"for":[16],"intelligent":[17],"unmanned":[18],"systems.":[19],"Existing":[20],"methods":[21],"either":[22],"focus":[23],"on":[24],"pixel-level":[25],"while":[27,101],"overlooking":[28],"downstream":[29],"task":[30,126,145],"adaptability":[31],"or":[32],"implicitly":[33],"learn":[34],"rigid":[35],"semantics":[36,107],"through":[37],"cascaded":[38],"detection/segmentation":[39],"models,":[40],"unable":[41],"to":[42],"interactively":[43],"address":[44],"diverse":[45],"semantic":[46,90],"target":[47],"perception":[48],"needs.":[49],"We":[50],"propose":[51],"CtrlFuse,":[52],"a":[53,70,74,80],"controllable":[54],"framework":[57],"that":[58],"enables":[59],"interactive":[60],"dynamic":[61],"guided":[63],"mask":[65,99],"prompts.":[66],"The":[67,85],"model":[68],"integrates":[69],"multi-modal":[71],"feature":[72],"extractor,":[73],"reference":[75],"prompt":[76],"encoder":[77],"(RPE),":[78],"prompt-semantic":[81],"module":[83],"(PSFM).":[84],"RPE":[86],"dynamically":[87],"encodes":[88],"task-specific":[89],"prompts":[91],"fine-tuning":[93],"pre-trained":[94],"segmentation":[95,116,140,151],"models":[96],"with":[97,142],"input":[98],"guidance,":[100],"the":[102,143,149],"PSFM":[103],"explicitly":[104],"injects":[105],"these":[106],"into":[108],"features.":[110],"Through":[111],"synergistic":[112],"optimization":[113],"of":[114],"parallel":[115],"branches,":[119],"our":[120],"method":[121],"achieves":[122],"mutual":[123],"enhancement":[124],"between":[125],"performance":[127],"quality.":[130],"Experiments":[131],"demonstrate":[132],"state-of-the-art":[133],"results":[134],"in":[135],"both":[136],"controllability":[138],"accuracy,":[141],"adapted":[144],"branch":[146],"even":[147],"outperforming":[148],"original":[150],"model.":[152]},"counts_by_year":[],"updated_date":"2026-01-15T23:21:31.212559","created_date":"2026-01-15T00:00:00"}
