{"id":"https://openalex.org/W7147294198","doi":"https://doi.org/10.1109/cnml68938.2026.11452375","title":"Parameter-Efficient Federated Multimodal Alignment Tuning for Heterogeneous Clients","display_name":"Parameter-Efficient Federated Multimodal Alignment Tuning for Heterogeneous Clients","publication_year":2026,"publication_date":"2026-01-30","ids":{"openalex":"https://openalex.org/W7147294198","doi":"https://doi.org/10.1109/cnml68938.2026.11452375"},"language":null,"primary_location":{"id":"doi:10.1109/cnml68938.2026.11452375","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cnml68938.2026.11452375","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 International Conference on Communication Networks and Machine Learning (CNML)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5132689688","display_name":"Xu Tan","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Xu Tan","raw_affiliation_strings":["Ceyear Technologies Co., Ltd,Qingdao,China"],"affiliations":[{"raw_affiliation_string":"Ceyear Technologies Co., Ltd,Qingdao,China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5132689688"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.94162456,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1167","last_page":"1171"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.29120001196861267,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.29120001196861267,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.17030000686645508,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.10760000348091125,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.7143999934196472},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.555899977684021},{"id":"https://openalex.org/keywords/federated-learning","display_name":"Federated learning","score":0.4546000063419342},{"id":"https://openalex.org/keywords/multimodality","display_name":"Multimodality","score":0.42320001125335693},{"id":"https://openalex.org/keywords/raw-data","display_name":"Raw data","score":0.40630000829696655},{"id":"https://openalex.org/keywords/mechanism","display_name":"Mechanism (biology)","score":0.3817000091075897},{"id":"https://openalex.org/keywords/information-exchange","display_name":"Information exchange","score":0.3546000123023987},{"id":"https://openalex.org/keywords/joint","display_name":"Joint (building)","score":0.3449999988079071}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8518000245094299},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.7143999934196472},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.555899977684021},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.531000018119812},{"id":"https://openalex.org/C2992525071","wikidata":"https://www.wikidata.org/wiki/Q50818671","display_name":"Federated learning","level":2,"score":0.4546000063419342},{"id":"https://openalex.org/C2780910867","wikidata":"https://www.wikidata.org/wiki/Q1952416","display_name":"Multimodality","level":2,"score":0.42320001125335693},{"id":"https://openalex.org/C132964779","wikidata":"https://www.wikidata.org/wiki/Q2110223","display_name":"Raw data","level":2,"score":0.40630000829696655},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3862000107765198},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.3817000091075897},{"id":"https://openalex.org/C189693848","wikidata":"https://www.wikidata.org/wiki/Q6031064","display_name":"Information exchange","level":2,"score":0.3546000123023987},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.3449999988079071},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3294999897480011},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.3294000029563904},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.31940001249313354},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.3025999963283539},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.2989000082015991},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.29429998993873596},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.2930999994277954},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.289900004863739},{"id":"https://openalex.org/C152139883","wikidata":"https://www.wikidata.org/wiki/Q252973","display_name":"Mutual information","level":2,"score":0.27790001034736633},{"id":"https://openalex.org/C135641252","wikidata":"https://www.wikidata.org/wiki/Q738567","display_name":"Multimodal interaction","level":2,"score":0.275299996137619},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.25929999351501465}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cnml68938.2026.11452375","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cnml68938.2026.11452375","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 International Conference on Communication Networks and Machine Learning (CNML)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":5,"referenced_works":["https://openalex.org/W2185175083","https://openalex.org/W2952122856","https://openalex.org/W3204049089","https://openalex.org/W4312566746","https://openalex.org/W4390659072"],"related_works":[],"abstract_inverted_index":{"Large-scale":[0],"vision-language":[1],"models":[2],"like":[3],"CLIP":[4],"achieve":[5],"breakthrough":[6],"performance":[7],"in":[8,31,68,178],"zero-shot":[9],"classification":[10],"and":[11,82,89,142,184,191,204],"cross-modal":[12,47,162],"retrieval":[13],"by":[14,147,202],"learning":[15,36],"aligned":[16],"representations":[17],"from":[18,139],"massive":[19],"image-text":[20],"pairs.":[21],"However,":[22],"their":[23,176],"centralized":[24],"training":[25,39],"paradigm":[26],"faces":[27],"data":[28],"sharing":[29,41],"challenges":[30],"privacy-sensitive":[32],"domains.":[33],"Although":[34],"federated":[35,60,198],"enables":[37],"collaborative":[38],"without":[40],"raw":[42],"data,":[43],"it":[44],"encounters":[45],"the":[46,73,115,137,210],"alignment":[48,62,83,116,122],"challenge":[49],"with":[50],"heterogeneous":[51],"client":[52],"modalities.":[53],"To":[54,118],"address":[55],"this":[56,69],"challenge,":[57],"a":[58,124,155],"parameter-efficient":[59],"multimodal":[61,111,140,179,199],"tuning":[63],"framework":[64],"Fed-MoAlign":[65,188],"is":[66,75,87,130,165],"proposed":[67,131,214],"paper.":[70],"In":[71],"Fed-MoAlign,":[72],"model":[74],"decomposed":[76],"into":[77,96],"vision":[78],"encoder,":[79,81],"text":[80],"layer.":[84,117],"The":[85],"backbone":[86],"frozen":[88],"Low-Rank":[90],"Adaptation":[91],"(LoRA)":[92],"layers":[93,173],"are":[94,134,145],"inserted":[95],"encoders":[97],"based":[98,160],"on":[99,161,182],"pretrained":[100],"CLIP.":[101],"Unimodal":[102],"clients":[103,112,121,149],"train":[104],"corresponding":[105],"encoder":[106],"LoRA":[107],"parameters":[108],"independently,":[109],"while":[110],"additionally":[113],"update":[114],"enable":[119],"unimodal":[120,148],"training,":[123],"dynamic":[125],"prototype-based":[126],"pseudo-modality":[127],"generation":[128],"algorithm":[129],"where":[132],"prototypes":[133],"maintained":[135],"at":[136],"server":[138],"clients,":[141],"pseudo":[143],"features":[144],"generated":[146],"via":[150],"similarity-weighted":[151],"prototype":[152],"retrieval.":[153],"Furthermore,":[154],"hierarchical":[156],"selective":[157],"aggregation":[158],"mechanism":[159],"mutual":[163],"information":[164],"introduced,":[166],"designing":[167],"differentiated":[168],"strategies":[169],"for":[170],"different":[171],"network":[172],"according":[174],"to":[175],"roles":[177],"alignment.":[180],"Experiments":[181],"MS-COCO":[183],"Flickr30K":[185],"demonstrate":[186],"that":[187],"achieves":[189],"55.1%":[190],"35.6%":[192],"I2T":[193],"R@1":[194],"respectively,":[195],"outperforming":[196],"state-of-the-art":[197],"methods":[200],"CreamFL":[201],"3.0%":[203],"3.3%.":[205],"Ablation":[206],"studies":[207],"further":[208],"validate":[209],"effectiveness":[211],"of":[212],"our":[213],"components.":[215]},"counts_by_year":[],"updated_date":"2026-04-02T13:53:19.096889","created_date":"2026-04-02T00:00:00"}
