{"id":"https://openalex.org/W4318147820","doi":"https://doi.org/10.1109/bigdata55660.2022.10020569","title":"ClinicalLayoutLM: A Pre-trained Multi-modal Model for Understanding Scanned Document in Electronic Health Records","display_name":"ClinicalLayoutLM: A Pre-trained Multi-modal Model for Understanding Scanned Document in Electronic Health Records","publication_year":2022,"publication_date":"2022-12-17","ids":{"openalex":"https://openalex.org/W4318147820","doi":"https://doi.org/10.1109/bigdata55660.2022.10020569"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata55660.2022.10020569","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata55660.2022.10020569","pdf_url":null,"source":{"id":"https://openalex.org/S4363607709","display_name":"2022 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020094404","display_name":"Qiang Wei","orcid":null},"institutions":[{"id":"https://openalex.org/I919571938","display_name":"The University of Texas Health Science Center at Houston","ror":"https://ror.org/03gds6c39","country_code":"US","type":"education","lineage":["https://openalex.org/I919571938"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Qiang Wei","raw_affiliation_strings":["The University of Texas Health Science Center at Houston,School of Biomedical Informatics,Houston,USA","School of Biomedical Informatics, The University of Texas Health Science Center at Houston, Houston, USA"],"affiliations":[{"raw_affiliation_string":"The University of Texas Health Science Center at Houston,School of Biomedical Informatics,Houston,USA","institution_ids":["https://openalex.org/I919571938"]},{"raw_affiliation_string":"School of Biomedical Informatics, The University of Texas Health Science Center at Houston, Houston, USA","institution_ids":["https://openalex.org/I919571938"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030584533","display_name":"Xu Zuo","orcid":"https://orcid.org/0009-0008-8571-5529"},"institutions":[{"id":"https://openalex.org/I919571938","display_name":"The University of Texas Health Science Center at Houston","ror":"https://ror.org/03gds6c39","country_code":"US","type":"education","lineage":["https://openalex.org/I919571938"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xu Zuo","raw_affiliation_strings":["The University of Texas Health Science Center at Houston,School of Biomedical Informatics,Houston,USA","School of Biomedical Informatics, The University of Texas Health Science Center at Houston, Houston, USA"],"affiliations":[{"raw_affiliation_string":"The University of Texas Health Science Center at Houston,School of Biomedical Informatics,Houston,USA","institution_ids":["https://openalex.org/I919571938"]},{"raw_affiliation_string":"School of Biomedical Informatics, The University of Texas Health Science Center at Houston, Houston, USA","institution_ids":["https://openalex.org/I919571938"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004590467","display_name":"Omer Anjum","orcid":"https://orcid.org/0009-0006-9930-6596"},"institutions":[{"id":"https://openalex.org/I919571938","display_name":"The University of Texas Health Science Center at Houston","ror":"https://ror.org/03gds6c39","country_code":"US","type":"education","lineage":["https://openalex.org/I919571938"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Omer Anjum","raw_affiliation_strings":["The University of Texas Health Science Center at Houston,School of Biomedical Informatics,Houston,USA","School of Biomedical Informatics, The University of Texas Health Science Center at Houston, Houston, USA"],"affiliations":[{"raw_affiliation_string":"The University of Texas Health Science Center at Houston,School of Biomedical Informatics,Houston,USA","institution_ids":["https://openalex.org/I919571938"]},{"raw_affiliation_string":"School of Biomedical Informatics, The University of Texas Health Science Center at Houston, Houston, USA","institution_ids":["https://openalex.org/I919571938"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100715220","display_name":"Yan Hu","orcid":"https://orcid.org/0009-0008-2413-5918"},"institutions":[{"id":"https://openalex.org/I919571938","display_name":"The University of Texas Health Science Center at Houston","ror":"https://ror.org/03gds6c39","country_code":"US","type":"education","lineage":["https://openalex.org/I919571938"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yan Hu","raw_affiliation_strings":["The University of Texas Health Science Center at Houston,School of Biomedical Informatics,Houston,USA","School of Biomedical Informatics, The University of Texas Health Science Center at Houston, Houston, USA"],"affiliations":[{"raw_affiliation_string":"The University of Texas Health Science Center at Houston,School of Biomedical Informatics,Houston,USA","institution_ids":["https://openalex.org/I919571938"]},{"raw_affiliation_string":"School of Biomedical Informatics, The University of Texas Health Science Center at Houston, Houston, USA","institution_ids":["https://openalex.org/I919571938"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075321787","display_name":"Ryan Denlinger","orcid":"https://orcid.org/0000-0001-9074-3702"},"institutions":[{"id":"https://openalex.org/I919571938","display_name":"The University of Texas Health Science Center at Houston","ror":"https://ror.org/03gds6c39","country_code":"US","type":"education","lineage":["https://openalex.org/I919571938"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ryan Denlinger","raw_affiliation_strings":["The University of Texas Health Science Center at Houston,School of Biomedical Informatics,Houston,USA","School of Biomedical Informatics, The University of Texas Health Science Center at Houston, Houston, USA"],"affiliations":[{"raw_affiliation_string":"The University of Texas Health Science Center at Houston,School of Biomedical Informatics,Houston,USA","institution_ids":["https://openalex.org/I919571938"]},{"raw_affiliation_string":"School of Biomedical Informatics, The University of Texas Health Science Center at Houston, Houston, USA","institution_ids":["https://openalex.org/I919571938"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075275684","display_name":"Elmer V. Bernstam","orcid":"https://orcid.org/0000-0001-7643-791X"},"institutions":[{"id":"https://openalex.org/I4210159031","display_name":"The University of Texas Health Science Center","ror":"https://ror.org/05cwbxa29","country_code":"US","type":"education","lineage":["https://openalex.org/I4210159031"]},{"id":"https://openalex.org/I919571938","display_name":"The University of Texas Health Science Center at Houston","ror":"https://ror.org/03gds6c39","country_code":"US","type":"education","lineage":["https://openalex.org/I919571938"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Elmer V. Bernstam","raw_affiliation_strings":["The University of Texas Health Science Center at Houston,School of Biomedical Informatics Division of General Internal Medicine, McGovern Medical School,Houston,USA","School of Biomedical Informatics Division of General Internal Medicine, McGovern Medical School, The University of Texas Health Science Center at Houston, Houston, USA"],"affiliations":[{"raw_affiliation_string":"The University of Texas Health Science Center at Houston,School of Biomedical Informatics Division of General Internal Medicine, McGovern Medical School,Houston,USA","institution_ids":["https://openalex.org/I4210159031"]},{"raw_affiliation_string":"School of Biomedical Informatics Division of General Internal Medicine, McGovern Medical School, The University of Texas Health Science Center at Houston, Houston, USA","institution_ids":["https://openalex.org/I919571938"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064643788","display_name":"Martin J. Citardi","orcid":"https://orcid.org/0000-0002-1155-9235"},"institutions":[{"id":"https://openalex.org/I919571938","display_name":"The University of Texas Health Science Center at Houston","ror":"https://ror.org/03gds6c39","country_code":"US","type":"education","lineage":["https://openalex.org/I919571938"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Martin J Citardi","raw_affiliation_strings":["The University of Texas Health Science Center at Houston,McGovern Medical School,Department of Otorhinolaryngology-Head &#x0026; Neck Surgery,Houston,USA"],"affiliations":[{"raw_affiliation_string":"The University of Texas Health Science Center at Houston,McGovern Medical School,Department of Otorhinolaryngology-Head &#x0026; Neck Surgery,Houston,USA","institution_ids":["https://openalex.org/I919571938"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5084072550","display_name":"Hua Xu","orcid":"https://orcid.org/0000-0002-5274-4672"},"institutions":[{"id":"https://openalex.org/I919571938","display_name":"The University of Texas Health Science Center at Houston","ror":"https://ror.org/03gds6c39","country_code":"US","type":"education","lineage":["https://openalex.org/I919571938"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hua Xu","raw_affiliation_strings":["The University of Texas Health Science Center at Houston,School of Biomedical Informatics,Houston,USA","School of Biomedical Informatics, The University of Texas Health Science Center at Houston, Houston, USA"],"affiliations":[{"raw_affiliation_string":"The University of Texas Health Science Center at Houston,School of Biomedical Informatics,Houston,USA","institution_ids":["https://openalex.org/I919571938"]},{"raw_affiliation_string":"School of Biomedical Informatics, The University of Texas Health Science Center at Houston, Houston, USA","institution_ids":["https://openalex.org/I919571938"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5020094404"],"corresponding_institution_ids":["https://openalex.org/I919571938"],"apc_list":null,"apc_paid":null,"fwci":0.239,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.60936835,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"2821","last_page":"2827"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9866999983787537,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9866999983787537,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9814000129699707,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9678000211715698,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8345280885696411},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.6755833625793457},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5859280824661255},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5805113315582275},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5686876177787781},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.5666488409042358},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5525467991828918},{"id":"https://openalex.org/keywords/optical-character-recognition","display_name":"Optical character recognition","score":0.5516760945320129},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5177770853042603},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4518454074859619},{"id":"https://openalex.org/keywords/health-records","display_name":"Health records","score":0.4318188726902008},{"id":"https://openalex.org/keywords/character","display_name":"Character (mathematics)","score":0.4133458137512207},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.33178478479385376},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.2961101233959198}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8345280885696411},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.6755833625793457},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5859280824661255},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5805113315582275},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5686876177787781},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.5666488409042358},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5525467991828918},{"id":"https://openalex.org/C546480517","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Optical character recognition","level":3,"score":0.5516760945320129},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5177770853042603},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4518454074859619},{"id":"https://openalex.org/C3019952477","wikidata":"https://www.wikidata.org/wiki/Q1324077","display_name":"Health records","level":3,"score":0.4318188726902008},{"id":"https://openalex.org/C2780861071","wikidata":"https://www.wikidata.org/wiki/Q1062934","display_name":"Character (mathematics)","level":2,"score":0.4133458137512207},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.33178478479385376},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.2961101233959198},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C160735492","wikidata":"https://www.wikidata.org/wiki/Q31207","display_name":"Health care","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata55660.2022.10020569","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata55660.2022.10020569","pdf_url":null,"source":{"id":"https://openalex.org/S4363607709","display_name":"2022 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.7900000214576721}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W2912300176","https://openalex.org/W2954499361","https://openalex.org/W2962772269","https://openalex.org/W2963716420","https://openalex.org/W2986619406","https://openalex.org/W2997154779","https://openalex.org/W3003273206","https://openalex.org/W3003484198","https://openalex.org/W3011411500","https://openalex.org/W3013022628","https://openalex.org/W3027067038","https://openalex.org/W3034864438","https://openalex.org/W3035089734","https://openalex.org/W3046375318","https://openalex.org/W3092930902","https://openalex.org/W3093218477","https://openalex.org/W3104953317","https://openalex.org/W3159672047","https://openalex.org/W3163650427","https://openalex.org/W3170863103","https://openalex.org/W3176016422","https://openalex.org/W3176664887","https://openalex.org/W3176851559","https://openalex.org/W3182680257","https://openalex.org/W3196343399","https://openalex.org/W3200439183","https://openalex.org/W3202839357","https://openalex.org/W3204562006","https://openalex.org/W3208292737","https://openalex.org/W4224921511","https://openalex.org/W4226020328","https://openalex.org/W4283034476","https://openalex.org/W4287657829","https://openalex.org/W4304013646","https://openalex.org/W4304014014","https://openalex.org/W4312233877","https://openalex.org/W6783817627","https://openalex.org/W6796761347","https://openalex.org/W6809958762","https://openalex.org/W6839200446"],"related_works":["https://openalex.org/W3107474891","https://openalex.org/W2081647779","https://openalex.org/W2368651715","https://openalex.org/W3185852197","https://openalex.org/W1788528807","https://openalex.org/W1975174578","https://openalex.org/W104581431","https://openalex.org/W2725657302","https://openalex.org/W2990109640","https://openalex.org/W4318147820"],"abstract_inverted_index":{"Scanned":[0],"documents":[1,23,73,114,151],"(e.g.,":[2,77],"faxes)":[3],"are":[4,12],"still":[5],"widely":[6],"used":[7],"in":[8,14,21,24],"clinical":[9,29,72,85,113,150,159],"practice":[10],"and":[11,31,66,80,152,171,180],"prevalent":[13],"Electronic":[15],"Health":[16],"Records":[17],"(EHR).":[18],"Unlocking":[19],"information":[20,46,68,169],"scanned":[22,71,90,112],"EHRs":[25],"is":[26,35,140,176,184],"critical":[27],"for":[28,149],"operation":[30],"research.":[32],"However,":[33],"it":[34,38,155],"challenging":[36],"as":[37,166],"requires":[39],"converting":[40],"images":[41],"to":[42,69,109],"texts":[43],"before":[44],"applying":[45],"extraction":[47,170],"technologies.":[48],"Here":[49],"we":[50,92,153],"propose":[51],"a":[52,84,99],"multi-modal":[53,100,147],"approach":[54],"(ClinicalLayoutLM)":[55],"that":[56,131],"jointly":[57],"models":[58,148],"text":[59,135],"extracted":[60],"from":[61,102,136],"Optical":[62],"Character":[63],"Recognition":[64],"(OCR)":[65],"layout/image":[67],"classify":[70,110],"into":[74,115],"different":[75],"categories":[76],"lab":[78],"reports":[79],"CT":[81],"scans).":[82],"Using":[83],"corpus":[86],"of":[87,123,143,146],"348,":[88],"311":[89],"documents,":[91],"continually":[93],"pretrained":[94],"ClinicalLayoutLM":[95,118,139],"based":[96,133],"on":[97,134],"LayoutLMv3,":[98],"model":[101,129,183],"the":[103,107,111,127,141,181],"open":[104],"domain.":[105],"For":[106],"task":[108],"16":[116],"categories,":[117],"achieved":[119],"an":[120],"F1":[121],"score":[122],"0.9051,":[124],"which":[125],"outperformed":[126],"baseline":[128],"(0.8840)":[130],"was":[132],"OCR":[137],"only.":[138],"first":[142],"its":[144],"kind":[145],"believe":[154],"could":[156],"benefit":[157],"other":[158],"natural":[160],"language":[161],"processing":[162],"(NLP)":[163],"tasks":[164],"such":[165],"layout":[167],"analysis,":[168],"so":[172],"on.":[173],"The":[174],"code":[175],"available":[177,185],"at":[178],"https://github.com/UTHealth-CCB/ClinicalLayoutLM":[179],"pre-trained":[182],"upon":[186],"request.":[187]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2}],"updated_date":"2026-03-27T14:29:43.386196","created_date":"2025-10-10T00:00:00"}
