{"id":"https://openalex.org/W4407450062","doi":"https://doi.org/10.1109/jbhi.2025.3532311","title":"Improving Foundation Model for Endoscopy Video Analysis via Representation Learning on Long Sequences","display_name":"Improving Foundation Model for Endoscopy Video Analysis via Representation Learning on Long Sequences","publication_year":2025,"publication_date":"2025-02-13","ids":{"openalex":"https://openalex.org/W4407450062","doi":"https://doi.org/10.1109/jbhi.2025.3532311","pmid":"https://pubmed.ncbi.nlm.nih.gov/40031835"},"language":"en","primary_location":{"id":"doi:10.1109/jbhi.2025.3532311","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jbhi.2025.3532311","pdf_url":null,"source":{"id":"https://openalex.org/S2495854775","display_name":"IEEE Journal of Biomedical and Health Informatics","issn_l":"2168-2194","issn":["2168-2194","2168-2208"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal of Biomedical and Health Informatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102969023","display_name":"Zhao Wang","orcid":"https://orcid.org/0000-0003-1860-8391"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"HK","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["HK"],"is_corresponding":true,"raw_author_name":"Zhao Wang","raw_affiliation_strings":["Department of Computer Science and Engineering, The Chinese University of Hong Kong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, The Chinese University of Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100785985","display_name":"Chang Liu","orcid":"https://orcid.org/0000-0002-0219-959X"},"institutions":[{"id":"https://openalex.org/I4210128910","display_name":"Group Sense (China)","ror":"https://ror.org/036wd5777","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210128910"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chang Liu","raw_affiliation_strings":["SenseTime Research, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"SenseTime Research, Shanghai, China","institution_ids":["https://openalex.org/I4210128910"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092000475","display_name":"Lingting Zhu","orcid":"https://orcid.org/0000-0002-1478-3232"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Lingting Zhu","raw_affiliation_strings":["Department of Statistics and Actuarial Science, The University of Hong Kong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"Department of Statistics and Actuarial Science, The University of Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111778811","display_name":"Tongtong Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"HK","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Tongtong Wang","raw_affiliation_strings":["Department of Computer Science and Engineering, The Chinese University of Hong Kong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, The Chinese University of Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066553616","display_name":"Shaoting Zhang","orcid":"https://orcid.org/0000-0002-8719-448X"},"institutions":[{"id":"https://openalex.org/I4210128910","display_name":"Group Sense (China)","ror":"https://ror.org/036wd5777","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210128910"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shaoting Zhang","raw_affiliation_strings":["SenseTime Research, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"SenseTime Research, Shanghai, China","institution_ids":["https://openalex.org/I4210128910"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5090516040","display_name":"Qi Dou","orcid":"https://orcid.org/0000-0002-3416-9950"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"HK","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Qi Dou","raw_affiliation_strings":["Department of Computer Science and Engineering, The Chinese University of Hong Kong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, The Chinese University of Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I177725633"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5102969023"],"corresponding_institution_ids":["https://openalex.org/I177725633"],"apc_list":null,"apc_paid":null,"fwci":4.185,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.93584727,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":"29","issue":"5","first_page":"3526","last_page":"3536"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10552","display_name":"Colorectal Cancer Screening and Detection","score":0.9904999732971191,"subfield":{"id":"https://openalex.org/subfields/2730","display_name":"Oncology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T10552","display_name":"Colorectal Cancer Screening and Detection","score":0.9904999732971191,"subfield":{"id":"https://openalex.org/subfields/2730","display_name":"Oncology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11378","display_name":"Gastrointestinal Bleeding Diagnosis and Treatment","score":0.948199987411499,"subfield":{"id":"https://openalex.org/subfields/2715","display_name":"Gastroenterology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10696","display_name":"Gastric Cancer Management and Outcomes","score":0.9266999959945679,"subfield":{"id":"https://openalex.org/subfields/2740","display_name":"Pulmonary and Respiratory Medicine"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7853760719299316},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.699615478515625},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.6682050228118896},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.5298761129379272},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.46511274576187134},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.43999478220939636},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.433206707239151},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.42574673891067505},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3192983865737915},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.15606316924095154}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7853760719299316},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.699615478515625},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6682050228118896},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.5298761129379272},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.46511274576187134},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.43999478220939636},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.433206707239151},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.42574673891067505},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3192983865737915},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.15606316924095154},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D004724","descriptor_name":"Endoscopy","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true},{"descriptor_ui":"D004724","descriptor_name":"Endoscopy","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true},{"descriptor_ui":"D004724","descriptor_name":"Endoscopy","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D007091","descriptor_name":"Image Processing, Computer-Assisted","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true},{"descriptor_ui":"D007091","descriptor_name":"Image Processing, Computer-Assisted","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true},{"descriptor_ui":"D007091","descriptor_name":"Image Processing, Computer-Assisted","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true},{"descriptor_ui":"D014741","descriptor_name":"Video Recording","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true},{"descriptor_ui":"D014741","descriptor_name":"Video Recording","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true},{"descriptor_ui":"D014741","descriptor_name":"Video Recording","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true}],"locations_count":2,"locations":[{"id":"doi:10.1109/jbhi.2025.3532311","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jbhi.2025.3532311","pdf_url":null,"source":{"id":"https://openalex.org/S2495854775","display_name":"IEEE Journal of Biomedical and Health Informatics","issn_l":"2168-2194","issn":["2168-2194","2168-2208"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal of Biomedical and Health Informatics","raw_type":"journal-article"},{"id":"pmid:40031835","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/40031835","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE journal of biomedical and health informatics","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3654096354","display_name":null,"funder_award_id":"A-CUHK402/23","funder_id":"https://openalex.org/F4320320883","funder_display_name":"Agence Nationale de la Recherche"},{"id":"https://openalex.org/G818396220","display_name":null,"funder_award_id":"62322318","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320320883","display_name":"Agence Nationale de la Recherche","ror":"https://ror.org/00rbzpz17"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":65,"referenced_works":["https://openalex.org/W2008359794","https://openalex.org/W2064675550","https://openalex.org/W2266464013","https://openalex.org/W2332757643","https://openalex.org/W2777273430","https://openalex.org/W2896457183","https://openalex.org/W2962936819","https://openalex.org/W2980225217","https://openalex.org/W2996012842","https://openalex.org/W3035524453","https://openalex.org/W3046240927","https://openalex.org/W3082604781","https://openalex.org/W3094502228","https://openalex.org/W3104061658","https://openalex.org/W3126721948","https://openalex.org/W3145385912","https://openalex.org/W3159481202","https://openalex.org/W3164631742","https://openalex.org/W3170837227","https://openalex.org/W3176780013","https://openalex.org/W3195814479","https://openalex.org/W3201942032","https://openalex.org/W3203507611","https://openalex.org/W3214993537","https://openalex.org/W3216270236","https://openalex.org/W4221148496","https://openalex.org/W4226428874","https://openalex.org/W4286951705","https://openalex.org/W4308456711","https://openalex.org/W4312472954","https://openalex.org/W4312804044","https://openalex.org/W4312891522","https://openalex.org/W4312933868","https://openalex.org/W4312956471","https://openalex.org/W4313007634","https://openalex.org/W4313156423","https://openalex.org/W4377000137","https://openalex.org/W4386071576","https://openalex.org/W4387225560","https://openalex.org/W4387225742","https://openalex.org/W4390872862","https://openalex.org/W4390874575","https://openalex.org/W4393170801","https://openalex.org/W4400881081","https://openalex.org/W4401852105","https://openalex.org/W4403150635","https://openalex.org/W4404575065","https://openalex.org/W4404784276","https://openalex.org/W4406482303","https://openalex.org/W6620707391","https://openalex.org/W6726497184","https://openalex.org/W6757817989","https://openalex.org/W6758474948","https://openalex.org/W6766978945","https://openalex.org/W6780226713","https://openalex.org/W6791353385","https://openalex.org/W6796761347","https://openalex.org/W6846313647","https://openalex.org/W6853469104","https://openalex.org/W6853660671","https://openalex.org/W6858865347","https://openalex.org/W6860732774","https://openalex.org/W6863339742","https://openalex.org/W6864842516","https://openalex.org/W6868993806"],"related_works":["https://openalex.org/W1981780420","https://openalex.org/W2182707996","https://openalex.org/W45233828","https://openalex.org/W2964988449","https://openalex.org/W3125011624","https://openalex.org/W2397952901","https://openalex.org/W2029380707","https://openalex.org/W188202134","https://openalex.org/W1508631387","https://openalex.org/W2370917603"],"abstract_inverted_index":{"Recent":[0],"advancements":[1],"in":[2],"endoscopy":[3,35,70,109,137],"video":[4,14,71,90,100,110,116],"analysis":[5],"have":[6],"relied":[7],"on":[8,133],"the":[9,31,148],"utilization":[10],"of":[11,22,34,136],"relatively":[12],"short":[13],"clips":[15],"extracted":[16],"from":[17,98],"longer":[18,119],"videos":[19],"or":[20,150],"millions":[21],"individual":[23],"frames.":[24,128],"However,":[25],"these":[26],"approaches":[27,166],"tend":[28],"to":[29],"neglect":[30],"domain-specific":[32],"characteristics":[33],"data,":[36],"which":[37,92],"is":[38,93,131],"typically":[39],"presented":[40],"as":[41,147],"a":[42,59,64,78,85,106,168],"long":[43,69,99,108,114],"stream":[44],"containing":[45],"valuable":[46],"semantic":[47],"spatial":[48],"and":[49,123,143,164,179,186,189,197,204],"temporal":[50,151],"information.":[51],"To":[52,73],"address":[53],"this":[54],"limitation,":[55],"we":[56,76,104],"propose":[57,77],"EndoFM-LV,":[58],"foundation":[60],"model":[61],"developed":[62],"under":[63],"minute-level":[65],"pre-training":[66],"framework":[67,87,159],"upon":[68],"sequences.":[72,101],"be":[74],"specific,":[75],"novel":[79],"masked":[80],"token":[81],"modeling":[82],"scheme":[83],"within":[84],"teacher-student":[86],"for":[88,95,182,200],"self-supervised":[89],"pre-training,":[91,103],"tailored":[94],"learning":[96],"representations":[97],"For":[102],"construct":[105],"large-scale":[107],"dataset":[111],"comprising":[112],"6,469":[113],"endoscopic":[115],"samples,":[117],"each":[118],"than":[120],"1":[121],"minute":[122],"totaling":[124],"over":[125],"13":[126],"million":[127],"Our":[129],"EndoFM-LV":[130],"evaluated":[132],"four":[134],"types":[135],"tasks,":[138],"namely":[139],"classification,":[140,183,201],"segmentation,":[141,184,202],"detection,":[142,185,203],"workflow":[144,187,205],"recognition,":[145],"serving":[146],"backbone":[149],"module.":[152],"Extensive":[153],"experimental":[154],"results":[155],"demonstrate":[156],"that":[157],"our":[158],"outperforms":[160],"previous":[161],"state-of-the-art":[162],"video-based":[163],"frame-based":[165],"by":[167],"significant":[169],"margin,":[170],"surpassing":[171],"Endo-FM":[172],"(5.6%":[173],"F1,":[174,178,192],"9.3%":[175,195],"Dice,":[176,194],"8.4%":[177],"3.3%":[180],"accuracy":[181,199],"recognition)":[188],"EndoSSL":[190],"(5.0%":[191],"8.1%":[193],"F1":[196],"3.1%":[198],"recognition).":[206]},"counts_by_year":[{"year":2025,"cited_by_count":4}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
