{"id":"https://openalex.org/W7124171612","doi":"https://doi.org/10.1145/3777577.3777673","title":"Hybrid Transformer-CNN for Multi-Modal Whole-Heart Segmentation with MAE Pretraining","display_name":"Hybrid Transformer-CNN for Multi-Modal Whole-Heart Segmentation with MAE Pretraining","publication_year":2025,"publication_date":"2025-10-24","ids":{"openalex":"https://openalex.org/W7124171612","doi":"https://doi.org/10.1145/3777577.3777673"},"language":null,"primary_location":{"id":"doi:10.1145/3777577.3777673","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3777577.3777673","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 6th International Symposium on Artificial Intelligence for Medical Sciences","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3777577.3777673","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080085928","display_name":"Xiaowei Jie","orcid":null},"institutions":[{"id":"https://openalex.org/I80947539","display_name":"Fuzhou University","ror":"https://ror.org/011xvna82","country_code":"CN","type":"education","lineage":["https://openalex.org/I80947539"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xiaowei Jie","raw_affiliation_strings":["Fuzhou University, Fuzhou, Fujian, China"],"affiliations":[{"raw_affiliation_string":"Fuzhou University, Fuzhou, Fujian, China","institution_ids":["https://openalex.org/I80947539"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100709936","display_name":"Jianzhong Chen","orcid":"https://orcid.org/0000-0002-8232-7298"},"institutions":[{"id":"https://openalex.org/I80947539","display_name":"Fuzhou University","ror":"https://ror.org/011xvna82","country_code":"CN","type":"education","lineage":["https://openalex.org/I80947539"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Chen","raw_affiliation_strings":["Fuzhou University, Fuzhou, Fujian, China"],"affiliations":[{"raw_affiliation_string":"Fuzhou University, Fuzhou, Fujian, China","institution_ids":["https://openalex.org/I80947539"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5123011626","display_name":"Zejing Lin","orcid":null},"institutions":[{"id":"https://openalex.org/I80947539","display_name":"Fuzhou University","ror":"https://ror.org/011xvna82","country_code":"CN","type":"education","lineage":["https://openalex.org/I80947539"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zejing Lin","raw_affiliation_strings":["Fuzhou University, Fuzhou, Fujian, China"],"affiliations":[{"raw_affiliation_string":"Fuzhou University, Fuzhou, Fujian, China","institution_ids":["https://openalex.org/I80947539"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5080085928"],"corresponding_institution_ids":["https://openalex.org/I80947539"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.70389152,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"587","last_page":"592"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.6789000034332275,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.6789000034332275,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.053300000727176666,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11775","display_name":"COVID-19 diagnosis using AI","score":0.04100000113248825,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.7886000275611877},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.6021000146865845},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5633000135421753},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5490999817848206},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.474700003862381},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.43869999051094055},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.4207000136375427},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.40869998931884766}],"concepts":[{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.7886000275611877},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.732699990272522},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7073000073432922},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.6021000146865845},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5633000135421753},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5490999817848206},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.474700003862381},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.43869999051094055},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.4207000136375427},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.40869998931884766},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3783999979496002},{"id":"https://openalex.org/C65885262","wikidata":"https://www.wikidata.org/wiki/Q7429708","display_name":"Scale-space segmentation","level":4,"score":0.3765000104904175},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.37229999899864197},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3059999942779541},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.3034000098705292},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.29510000348091125},{"id":"https://openalex.org/C25694479","wikidata":"https://www.wikidata.org/wiki/Q7446278","display_name":"Segmentation-based object categorization","level":5,"score":0.2930999994277954},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.28700000047683716},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.2554999887943268}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3777577.3777673","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3777577.3777673","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 6th International Symposium on Artificial Intelligence for Medical Sciences","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3777577.3777673","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3777577.3777673","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 6th International Symposium on Artificial Intelligence for Medical Sciences","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W1901129140","https://openalex.org/W2301358467","https://openalex.org/W3006040295","https://openalex.org/W3138516171","https://openalex.org/W3185864054","https://openalex.org/W4212875960","https://openalex.org/W4313156423","https://openalex.org/W4319300504","https://openalex.org/W4364352185"],"related_works":[],"abstract_inverted_index":{"Accurately":[0],"segmenting":[1],"the":[2,93,104,115,167],"entire":[3],"heart":[4],"from":[5],"multi-modal,":[6],"multi-center":[7,97,179],"three-dimensional":[8],"cardiac":[9,83,142],"images":[10,84],"is":[11],"of":[12],"critical":[13],"importance":[14],"for":[15,41,103,118,125],"clinical":[16],"diagnosis":[17],"and":[18,29,31,88,99,173],"treatment":[19],"planning.":[20],"However,":[21],"domain":[22],"differences":[23],"between":[24],"imaging":[25],"modalities":[26],"(e.g.,":[27],"CT":[28,98],"MRI)":[30],"distribution":[32],"inconsistencies":[33],"across":[34,96,140],"data":[35],"acquisition":[36],"centers":[37],"pose":[38],"significant":[39],"challenges":[40],"developing":[42],"robust":[43],"deep":[44],"learning":[45],"models":[46],"with":[47,67,122],"strong":[48,174],"generalization":[49,95,175],"capabilities.":[50],"To":[51],"address":[52],"these":[53],"issues,":[54],"we":[55,74,108,144],"propose":[56],"a":[57,68,110],"novel":[58],"multi-modal":[59],"whole-heart":[60],"segmentation":[61,71,106,138,180],"framework":[62,169],"that":[63,113,166],"integrates":[64],"MAE":[65,80],"pretraining":[66,78],"hybrid":[69],"Transformer-CNN":[70],"network.":[72],"Specifically,":[73],"first":[75],"perform":[76],"self-supervised":[77],"using":[79],"on":[81,159],"unlabeled":[82],"to":[85,136],"learn":[86],"modality-invariant":[87],"domain-agnostic":[89],"representations,":[90],"thereby":[91],"enhancing":[92],"model's":[94],"MRI":[100],"data.":[101],"Subsequently,":[102],"downstream":[105],"task,":[107],"construct":[109],"unified":[111],"architecture":[112],"combines":[114],"Swin":[116],"Transformer":[117],"modeling":[119],"long-range":[120],"dependencies":[121],"convolutional":[123],"layers":[124],"preserving":[126],"local":[127],"spatial":[128],"details,":[129],"enabling":[130],"efficient":[131],"multi-scale":[132],"information":[133],"fusion.":[134],"Furthermore,":[135],"improve":[137],"performance":[139,172],"multiple":[141],"structures,":[143],"design":[145],"an":[146],"Enhanced":[147],"Feature":[148],"Self-Attention":[149],"(E-FSA)":[150],"module,":[151],"which":[152],"dynamically":[153],"enhances":[154],"class-specific":[155],"feature":[156],"representations":[157],"based":[158],"inter-class":[160],"semantic":[161],"relationships.":[162],"Experimental":[163],"results":[164],"demonstrate":[165],"proposed":[168],"achieves":[170],"superior":[171],"in":[176],"challenging":[177],"cross-modal,":[178],"scenarios.":[181]},"counts_by_year":[],"updated_date":"2026-02-23T20:09:44.859080","created_date":"2026-01-15T00:00:00"}
