{"id":"https://openalex.org/W7154563822","doi":"https://doi.org/10.48550/arxiv.2604.13561","title":"CLIP Architecture for Abdominal CT Image-Text Alignment and Zero-Shot Learning: Investigating Batch Composition and Data Scaling","display_name":"CLIP Architecture for Abdominal CT Image-Text Alignment and Zero-Shot Learning: Investigating Batch Composition and Data Scaling","publication_year":2026,"publication_date":"2026-04-15","ids":{"openalex":"https://openalex.org/W7154563822","doi":"https://doi.org/10.48550/arxiv.2604.13561"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.13561","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.13561","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.13561","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133764370","display_name":"Shivika","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Shivika","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121492347","display_name":"Kartik Bose","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bose, Kartik","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5013272134","display_name":"Pankaj Gupta","orcid":"https://orcid.org/0000-0003-3914-3757"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gupta, Pankaj","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5133764370"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11775","display_name":"COVID-19 diagnosis using AI","score":0.4943999946117401,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T11775","display_name":"COVID-19 diagnosis using AI","score":0.4943999946117401,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.15029999613761902,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.0658000037074089,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.5964999794960022},{"id":"https://openalex.org/keywords/macro","display_name":"Macro","score":0.538100004196167},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.527899980545044},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.4729999899864197},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.4262000024318695},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.3882000148296356},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3508000075817108}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6258999705314636},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.5964999794960022},{"id":"https://openalex.org/C166955791","wikidata":"https://www.wikidata.org/wiki/Q629579","display_name":"Macro","level":2,"score":0.538100004196167},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.527899980545044},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.4729999899864197},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4713999927043915},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.4262000024318695},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.3882000148296356},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3508000075817108},{"id":"https://openalex.org/C31601959","wikidata":"https://www.wikidata.org/wiki/Q931309","display_name":"Medical imaging","level":2,"score":0.3174000084400177},{"id":"https://openalex.org/C61224824","wikidata":"https://www.wikidata.org/wiki/Q2260434","display_name":"Mixture model","level":2,"score":0.3068000078201294},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.30550000071525574},{"id":"https://openalex.org/C40231798","wikidata":"https://www.wikidata.org/wiki/Q1333743","display_name":"Composition (language)","level":2,"score":0.3028999865055084},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2840000092983246},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.28380000591278076},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.2809999883174896},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.26489999890327454},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2581999897956848},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.25189998745918274},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2517000138759613}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.13561","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.13561","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.13561","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.13561","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.4472716152667999}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Vision-language":[0],"models":[1],"trained":[2],"with":[3,45,105,126,141,185],"contrastive":[4],"learning":[5],"on":[6,24,89,121,153],"paired":[7],"medical":[8,31,208],"images":[9],"and":[10,83,129],"reports":[11,47],"show":[12],"strong":[13],"zero-shot":[14,54],"diagnostic":[15],"capabilities,":[16],"yet":[17],"the":[18,74,90,97,108,132,154,178,201],"effect":[19],"of":[20,57,69,131,169,181],"training":[21,78,125],"batch":[22,203],"composition":[23],"learned":[25],"representations":[26],"remains":[27],"unexplored":[28],"for":[29],"3D":[30,41,207],"imaging.":[32],"We":[33,64],"reproduce":[34],"Merlin,":[35],"a":[36,53,122],"dual-encoder":[37],"model":[38],"that":[39,163,177],"aligns":[40],"abdominal":[42],"CT":[43],"volumes":[44],"radiology":[46],"using":[48,85],"symmetric":[49],"InfoNCE":[50],"loss,":[51],"achieving":[52,107],"macro":[55],"F1":[56],"74.45%":[58],"across":[59],"30":[60],"findings":[61,143],"(original:":[62],"73.00%).":[63],"then":[65],"investigate":[66],"two":[67],"axes":[68],"variation.":[70],"First,":[71],"we":[72,116],"control":[73],"normal-to-abnormal":[75],"ratio":[76],"within":[77],"batches":[79],"at":[80,200],"25:75,":[81],"50:50,":[82],"75:25":[84,106],"section-level":[86],"balanced":[87,113,151],"sampling":[88,152],"full":[91],"dataset.":[92],"All":[93],"three":[94],"configurations":[95],"underperform":[96],"unbalanced":[98],"baseline":[99],"by":[100,206],"2.4":[101],"to":[102,139,160],"2.8":[103],"points,":[104],"best":[109],"result":[110],"(72.02%)":[111],"among":[112],"variants.":[114],"Second,":[115],"conduct":[117],"data":[118,147],"scaling":[119],"ablations":[120],"4,362-study":[123],"subset,":[124],"20%,":[127],"40%,":[128],"100%":[130],"data.":[133],"Performance":[134],"scales":[135],"sub-linearly":[136],"from":[137],"65.26%":[138],"71.88%,":[140],"individual":[142],"varying":[144],"dramatically":[145],"in":[146],"sensitivity.":[148],"Enforcing":[149],"50:50":[150],"same":[155],"subset":[156],"further":[157],"degrades":[158],"performance":[159],"68.01%,":[161],"confirming":[162],"explicit":[164],"class":[165,198],"balancing":[166,172],"hurts":[167],"regardless":[168],"dataset":[170],"or":[171],"granularity.":[173],"Our":[174],"results":[175],"indicate":[176],"stochastic":[179],"diversity":[180],"random":[182],"sampling,":[183],"combined":[184],"Merlin's":[186],"alternating":[187],"batching":[188],"over":[189],"anatomical":[190],"subsections,":[191],"provides":[192],"more":[193],"effective":[194],"regularization":[195],"than":[196],"engineered":[197],"ratios":[199],"small":[202],"sizes":[204],"required":[205],"volumes.":[209]},"counts_by_year":[],"updated_date":"2026-04-29T09:16:38.111599","created_date":"2026-04-17T00:00:00"}
