{"id":"https://openalex.org/W3202229673","doi":"https://doi.org/10.1145/3474085.3475418","title":"Multi-Modal Multi-Instance Learning for Retinal Disease Recognition","display_name":"Multi-Modal Multi-Instance Learning for Retinal Disease Recognition","publication_year":2021,"publication_date":"2021-10-17","ids":{"openalex":"https://openalex.org/W3202229673","doi":"https://doi.org/10.1145/3474085.3475418","mag":"3202229673"},"language":"en","primary_location":{"id":"doi:10.1145/3474085.3475418","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3474085.3475418","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2109.12307","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5060270456","display_name":"Xirong Li","orcid":"https://orcid.org/0000-0002-0220-8310"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xirong Li","raw_affiliation_strings":["Renmin University of China, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103820171","display_name":"Yang Zhou","orcid":"https://orcid.org/0009-0009-3685-4909"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang Zhou","raw_affiliation_strings":["Beijing Visionary Intelligence Ltd., Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing Visionary Intelligence Ltd., Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072667046","display_name":"Jie Wang","orcid":"https://orcid.org/0000-0001-5500-2060"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jie Wang","raw_affiliation_strings":["Renmin University of China, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030948218","display_name":"Hailan Lin","orcid":null},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hailan Lin","raw_affiliation_strings":["Renmin University of China, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043064399","display_name":"Jianchun Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jianchun Zhao","raw_affiliation_strings":["Beijing Visionary Intelligence Ltd., Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing Visionary Intelligence Ltd., Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119929807","display_name":"Dayong Ding","orcid":"https://orcid.org/0000-0001-9331-6677"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dayong Ding","raw_affiliation_strings":["Beijing Visionary Intelligence Ltd., Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing Visionary Intelligence Ltd., Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100303401","display_name":"Weihong Yu","orcid":"https://orcid.org/0000-0002-9758-9493"},"institutions":[{"id":"https://openalex.org/I200296433","display_name":"Chinese Academy of Medical Sciences & Peking Union Medical College","ror":"https://ror.org/02drdmm93","country_code":"CN","type":"education","lineage":["https://openalex.org/I200296433"]},{"id":"https://openalex.org/I2801228662","display_name":"Peking Union Medical College Hospital","ror":"https://ror.org/04jztag35","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I2801228662"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weihong Yu","raw_affiliation_strings":["Peking Union Medical College Hospital, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Peking Union Medical College Hospital, Beijing, China","institution_ids":["https://openalex.org/I2801228662","https://openalex.org/I200296433"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5000122982","display_name":"Youxin Chen","orcid":"https://orcid.org/0000-0002-7231-5058"},"institutions":[{"id":"https://openalex.org/I200296433","display_name":"Chinese Academy of Medical Sciences & Peking Union Medical College","ror":"https://ror.org/02drdmm93","country_code":"CN","type":"education","lineage":["https://openalex.org/I200296433"]},{"id":"https://openalex.org/I2801228662","display_name":"Peking Union Medical College Hospital","ror":"https://ror.org/04jztag35","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I2801228662"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Youxin Chen","raw_affiliation_strings":["Peking Union Medical College Hospital, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Peking Union Medical College Hospital, Beijing, China","institution_ids":["https://openalex.org/I2801228662","https://openalex.org/I200296433"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":4.3027,"has_fulltext":false,"cited_by_count":50,"citation_normalized_percentile":{"value":0.94887101,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"2474","last_page":"2482"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11438","display_name":"Retinal Imaging and Analysis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T11438","display_name":"Retinal Imaging and Analysis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T12599","display_name":"Retinal and Optic Conditions","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/2731","display_name":"Ophthalmology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T12874","display_name":"Digital Imaging for Blood Diseases","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7814830541610718},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.7570433616638184},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7112468481063843},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.6417440176010132},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.5939982533454895},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5686532258987427},{"id":"https://openalex.org/keywords/fundus","display_name":"Fundus (uterus)","score":0.48431769013404846},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4725479781627655},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.43130043148994446},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4220121204853058},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3825720548629761},{"id":"https://openalex.org/keywords/medicine","display_name":"Medicine","score":0.13464757800102234},{"id":"https://openalex.org/keywords/radiology","display_name":"Radiology","score":0.09545630216598511}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7814830541610718},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.7570433616638184},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7112468481063843},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.6417440176010132},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.5939982533454895},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5686532258987427},{"id":"https://openalex.org/C2776391266","wikidata":"https://www.wikidata.org/wiki/Q9612","display_name":"Fundus (uterus)","level":2,"score":0.48431769013404846},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4725479781627655},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.43130043148994446},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4220121204853058},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3825720548629761},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.13464757800102234},{"id":"https://openalex.org/C126838900","wikidata":"https://www.wikidata.org/wiki/Q77604","display_name":"Radiology","level":1,"score":0.09545630216598511},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3474085.3475418","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3474085.3475418","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Multimedia","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2109.12307","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2109.12307","pdf_url":"https://arxiv.org/pdf/2109.12307","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2109.12307","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2109.12307","pdf_url":"https://arxiv.org/pdf/2109.12307","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W2087461551","https://openalex.org/W2194775991","https://openalex.org/W2557738935","https://openalex.org/W2589074029","https://openalex.org/W2598442119","https://openalex.org/W2758333670","https://openalex.org/W2785934082","https://openalex.org/W2788633781","https://openalex.org/W2886281300","https://openalex.org/W2962934715","https://openalex.org/W2963403868","https://openalex.org/W2963524571","https://openalex.org/W2963587345","https://openalex.org/W2964069537","https://openalex.org/W2970971581","https://openalex.org/W2979644159","https://openalex.org/W2996993160","https://openalex.org/W3034924496","https://openalex.org/W3035588244","https://openalex.org/W3048939150","https://openalex.org/W3093023876","https://openalex.org/W3099638501","https://openalex.org/W3128049850","https://openalex.org/W3149332725","https://openalex.org/W3160648140","https://openalex.org/W3176196997","https://openalex.org/W4295312788","https://openalex.org/W4385245566","https://openalex.org/W4394666973"],"related_works":["https://openalex.org/W73545470","https://openalex.org/W4224266612","https://openalex.org/W2383394264","https://openalex.org/W4320153225","https://openalex.org/W4293261942","https://openalex.org/W3125968744","https://openalex.org/W2167701463","https://openalex.org/W2110287964","https://openalex.org/W4307407935","https://openalex.org/W649759291"],"abstract_inverted_index":{"This":[0],"paper":[1,130],"attacks":[2],"an":[3,23,31,162],"emerging":[4],"challenge":[5],"of":[6,16,25,55,64,100,165,174,184,196,204,222,229,235],"multi-modal":[7,13,102,122,224],"retinal":[8,107],"disease":[9,108,115],"recognition.":[10],"Given":[11],"a":[12,17,38,97,113,118,171,179,218],"case":[14],"consisting":[15,221],"color":[18],"fundus":[19],"photo":[20],"(CFP)":[21],"and":[22,57,68,77,139,200],"array":[24],"OCT":[26,58,140],"B-scan":[27],"images":[28],"acquired":[29],"during":[30],"eye":[32],"examination,":[33],"we":[34,167],"aim":[35],"to":[36,90,147,169,211],"build":[37],"deep":[39],"neural":[40],"network":[41,88],"that":[42],"recognizes":[43],"multiple":[44],"vision-threatening":[45],"diseases":[46],"for":[47,94,135,155],"the":[48,52,61,84,87,194,197,205,212,233,236],"given":[49,180],"case.":[50],"As":[51],"diagnostic":[53],"efficacy":[54],"CFP":[56,138,198,206],"is":[59,70],"disease-dependent,":[60],"network's":[62],"ability":[63],"being":[65],"both":[66,74],"selective":[67],"interpretable":[69],"important.":[71],"Moreover,":[72],"as":[73],"data":[75],"acquisition":[76],"manual":[78],"labeling":[79],"are":[80],"extremely":[81],"expensive":[82],"in":[83,128],"medical":[85],"domain,":[86],"has":[89],"be":[91],"relatively":[92,158],"lightweight":[93,143],"learning":[95,156],"from":[96,157,226],"limited":[98],"set":[99],"labeled":[101],"samples.":[103],"Prior":[104],"art":[105],"on":[106,112,117,217],"recognition":[109],"focuses":[110],"either":[111],"single":[114,119],"or":[116],"modality,":[120],"leaving":[121],"fusion":[123],"largely":[124],"underexplored.":[125],"We":[126],"propose":[127,168],"this":[129,185],"Multi-Modal":[131],"Multi-Instance":[132],"Learning":[133],"(MM-MIL)":[134],"selectively":[136],"fusing":[137],"modalities.":[141],"Its":[142],"architecture":[144],"(as":[145],"compared":[146],"current":[148],"multi-head":[149],"attention":[150],"modules)":[151],"makes":[152],"it":[153],"suited":[154],"small-sized":[159],"datasets.":[160],"For":[161],"effective":[163],"use":[164],"MM-MIL,":[166],"generate":[170],"pseudo":[172],"sequence":[173],"CFPs":[175],"by":[176],"over":[177],"sampling":[178],"CFP.":[181],"The":[182],"benefits":[183],"tactic":[186],"include":[187],"well":[188],"balancing":[189],"instances":[190],"across":[191],"modalities,":[192],"increasing":[193],"resolution":[195],"input,":[199],"finding":[201],"out":[202],"regions":[203],"most":[207],"relevant":[208],"with":[209],"respect":[210],"final":[213],"diagnosis.":[214],"Extensive":[215],"experiments":[216],"real-world":[219],"dataset":[220],"1,206":[223],"cases":[225],"1,193":[227],"eyes":[228],"836":[230],"subjects":[231],"demonstrate":[232],"viability":[234],"proposed":[237],"model.":[238]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":17},{"year":2024,"cited_by_count":14},{"year":2023,"cited_by_count":9},{"year":2022,"cited_by_count":7}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
