{"id":"https://openalex.org/W4205656166","doi":"https://doi.org/10.3233/shti210848","title":"An Evaluation of Pretrained BERT Models for Comparing Semantic Similarity Across Unstructured Clinical Trial Texts","display_name":"An Evaluation of Pretrained BERT Models for Comparing Semantic Similarity Across Unstructured Clinical Trial Texts","publication_year":2022,"publication_date":"2022-01-14","ids":{"openalex":"https://openalex.org/W4205656166","doi":"https://doi.org/10.3233/shti210848","pmid":"https://pubmed.ncbi.nlm.nih.gov/35062081"},"language":"en","primary_location":{"id":"doi:10.3233/shti210848","is_oa":true,"landing_page_url":"https://doi.org/10.3233/shti210848","pdf_url":"https://ebooks.iospress.nl/pdf/doi/10.3233/SHTI210848","source":{"id":"https://openalex.org/S4210179765","display_name":"Studies in health technology and informatics","issn_l":"0926-9630","issn":["0926-9630","1879-8365"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310318577","host_organization_name":"IOS Press","host_organization_lineage":["https://openalex.org/P4310318577"],"host_organization_lineage_names":["IOS Press"],"type":"book series"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Studies in Health Technology and Informatics","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://ebooks.iospress.nl/pdf/doi/10.3233/SHTI210848","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Jessica Patricoski","orcid":null},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]},{"id":"https://openalex.org/I2799853436","display_name":"Johns Hopkins Medicine","ror":"https://ror.org/037zgn354","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I2799853436"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jessica Patricoski","raw_affiliation_strings":["Biomedical Informatics and Data Science Section, Johns Hopkins University School of Medicine, Baltimore, MD"],"affiliations":[{"raw_affiliation_string":"Biomedical Informatics and Data Science Section, Johns Hopkins University School of Medicine, Baltimore, MD","institution_ids":["https://openalex.org/I2799853436","https://openalex.org/I145311948"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Johns Hopkins Molecular Tumor Board Investigators","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Johns Hopkins Molecular Tumor Board Investigators","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Kory Kreimeyer","orcid":null},"institutions":[{"id":"https://openalex.org/I4210164401","display_name":"Sidney Kimmel Comprehensive Cancer Center","ror":"https://ror.org/05m5b8x20","country_code":"US","type":"facility","lineage":["https://openalex.org/I2799853436","https://openalex.org/I4210164401"]},{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kory Kreimeyer","raw_affiliation_strings":["Sidney Kimmel Comprehensive Cancer Center, Johns Hopkins University School of Medicine, Baltimore, MD"],"affiliations":[{"raw_affiliation_string":"Sidney Kimmel Comprehensive Cancer Center, Johns Hopkins University School of Medicine, Baltimore, MD","institution_ids":["https://openalex.org/I4210164401","https://openalex.org/I145311948"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Archana Balan","orcid":null},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]},{"id":"https://openalex.org/I4210164401","display_name":"Sidney Kimmel Comprehensive Cancer Center","ror":"https://ror.org/05m5b8x20","country_code":"US","type":"facility","lineage":["https://openalex.org/I2799853436","https://openalex.org/I4210164401"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Archana Balan","raw_affiliation_strings":["Sidney Kimmel Comprehensive Cancer Center, Johns Hopkins University School of Medicine, Baltimore, MD"],"affiliations":[{"raw_affiliation_string":"Sidney Kimmel Comprehensive Cancer Center, Johns Hopkins University School of Medicine, Baltimore, MD","institution_ids":["https://openalex.org/I4210164401","https://openalex.org/I145311948"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Kent Hardart","orcid":null},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]},{"id":"https://openalex.org/I4210164401","display_name":"Sidney Kimmel Comprehensive Cancer Center","ror":"https://ror.org/05m5b8x20","country_code":"US","type":"facility","lineage":["https://openalex.org/I2799853436","https://openalex.org/I4210164401"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kent Hardart","raw_affiliation_strings":["Sidney Kimmel Comprehensive Cancer Center, Johns Hopkins University School of Medicine, Baltimore, MD"],"affiliations":[{"raw_affiliation_string":"Sidney Kimmel Comprehensive Cancer Center, Johns Hopkins University School of Medicine, Baltimore, MD","institution_ids":["https://openalex.org/I4210164401","https://openalex.org/I145311948"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jessica Tao","orcid":null},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]},{"id":"https://openalex.org/I4210164401","display_name":"Sidney Kimmel Comprehensive Cancer Center","ror":"https://ror.org/05m5b8x20","country_code":"US","type":"facility","lineage":["https://openalex.org/I2799853436","https://openalex.org/I4210164401"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jessica Tao","raw_affiliation_strings":["Sidney Kimmel Comprehensive Cancer Center, Johns Hopkins University School of Medicine, Baltimore, MD"],"affiliations":[{"raw_affiliation_string":"Sidney Kimmel Comprehensive Cancer Center, Johns Hopkins University School of Medicine, Baltimore, MD","institution_ids":["https://openalex.org/I4210164401","https://openalex.org/I145311948"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Valsamo Anagnostou","orcid":null},"institutions":[{"id":"https://openalex.org/I4210164401","display_name":"Sidney Kimmel Comprehensive Cancer Center","ror":"https://ror.org/05m5b8x20","country_code":"US","type":"facility","lineage":["https://openalex.org/I2799853436","https://openalex.org/I4210164401"]},{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Valsamo Anagnostou","raw_affiliation_strings":["Sidney Kimmel Comprehensive Cancer Center, Johns Hopkins University School of Medicine, Baltimore, MD"],"affiliations":[{"raw_affiliation_string":"Sidney Kimmel Comprehensive Cancer Center, Johns Hopkins University School of Medicine, Baltimore, MD","institution_ids":["https://openalex.org/I4210164401","https://openalex.org/I145311948"]}]},{"author_position":"last","author":{"id":null,"display_name":"Taxiarchis Botsis","orcid":null},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]},{"id":"https://openalex.org/I2799853436","display_name":"Johns Hopkins Medicine","ror":"https://ror.org/037zgn354","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I2799853436"]},{"id":"https://openalex.org/I4210164401","display_name":"Sidney Kimmel Comprehensive Cancer Center","ror":"https://ror.org/05m5b8x20","country_code":"US","type":"facility","lineage":["https://openalex.org/I2799853436","https://openalex.org/I4210164401"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Taxiarchis Botsis","raw_affiliation_strings":["Biomedical Informatics and Data Science Section, Johns Hopkins University School of Medicine, Baltimore, MD","Sidney Kimmel Comprehensive Cancer Center, Johns Hopkins University School of Medicine, Baltimore, MD"],"affiliations":[{"raw_affiliation_string":"Biomedical Informatics and Data Science Section, Johns Hopkins University School of Medicine, Baltimore, MD","institution_ids":["https://openalex.org/I2799853436","https://openalex.org/I145311948"]},{"raw_affiliation_string":"Sidney Kimmel Comprehensive Cancer Center, Johns Hopkins University School of Medicine, Baltimore, MD","institution_ids":["https://openalex.org/I4210164401","https://openalex.org/I145311948"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I145311948","https://openalex.org/I2799853436"],"apc_list":null,"apc_paid":null,"fwci":2.0572,"has_fulltext":true,"cited_by_count":7,"citation_normalized_percentile":{"value":0.88027047,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"289","issue":null,"first_page":"18","last_page":"21"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.476500004529953,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.476500004529953,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.22220000624656677,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.1298999935388565,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.6136000156402588},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5742999911308289},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5608999729156494},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5529999732971191},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.5184999704360962}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7698000073432922},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.7041000127792358},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.6136000156402588},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5795999765396118},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5742999911308289},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5608999729156494},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5529999732971191},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.5184999704360962},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.4814999997615814},{"id":"https://openalex.org/C535046627","wikidata":"https://www.wikidata.org/wiki/Q30612","display_name":"Clinical trial","level":2,"score":0.44119998812675476},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.39100000262260437},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3131999969482422},{"id":"https://openalex.org/C68859911","wikidata":"https://www.wikidata.org/wiki/Q1503724","display_name":"Pattern matching","level":2,"score":0.2700999975204468},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.2581999897956848}],"mesh":[{"descriptor_ui":"D002986","descriptor_name":"Clinical Trials as Topic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D002986","descriptor_name":"Clinical Trials as Topic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D002986","descriptor_name":"Clinical Trials as Topic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D007802","descriptor_name":"Language","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D007802","descriptor_name":"Language","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D007802","descriptor_name":"Language","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D009323","descriptor_name":"Natural Language Processing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D009323","descriptor_name":"Natural Language Processing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D009323","descriptor_name":"Natural Language Processing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D012660","descriptor_name":"Semantics","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D012660","descriptor_name":"Semantics","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D012660","descriptor_name":"Semantics","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true}],"locations_count":2,"locations":[{"id":"doi:10.3233/shti210848","is_oa":true,"landing_page_url":"https://doi.org/10.3233/shti210848","pdf_url":"https://ebooks.iospress.nl/pdf/doi/10.3233/SHTI210848","source":{"id":"https://openalex.org/S4210179765","display_name":"Studies in health technology and informatics","issn_l":"0926-9630","issn":["0926-9630","1879-8365"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310318577","host_organization_name":"IOS Press","host_organization_lineage":["https://openalex.org/P4310318577"],"host_organization_lineage_names":["IOS Press"],"type":"book series"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Studies in Health Technology and Informatics","raw_type":"book-chapter"},{"id":"pmid:35062081","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/35062081","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Studies in health technology and informatics","raw_type":null}],"best_oa_location":{"id":"doi:10.3233/shti210848","is_oa":true,"landing_page_url":"https://doi.org/10.3233/shti210848","pdf_url":"https://ebooks.iospress.nl/pdf/doi/10.3233/SHTI210848","source":{"id":"https://openalex.org/S4210179765","display_name":"Studies in health technology and informatics","issn_l":"0926-9630","issn":["0926-9630","1879-8365"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310318577","host_organization_name":"IOS Press","host_organization_lineage":["https://openalex.org/P4310318577"],"host_organization_lineage_names":["IOS Press"],"type":"book series"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Studies in Health Technology and Informatics","raw_type":"book-chapter"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4205656166.pdf","grobid_xml":"https://content.openalex.org/works/W4205656166.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Processing":[0],"unstructured":[1,57],"clinical":[2,18,51,61],"texts":[3],"is":[4,116],"often":[5],"necessary":[6],"to":[7,17,29,109],"support":[8,128],"certain":[9],"tasks":[10],"in":[11,46,88],"biomedicine,":[12],"such":[13],"as":[14],"matching":[15],"patients":[16],"trials.":[19,113],"Among":[20],"other":[21,124],"methods,":[22],"domain-specific":[23],"language":[24,125],"models":[25,45,85,126],"have":[26],"been":[27],"built":[28],"utilize":[30],"free-text":[31],"information.":[32],"This":[33,114],"study":[34],"evaluated":[35],"the":[36,48,65,72,77,92,99],"performance":[37],"of":[38,60],"Bidirectional":[39],"Encoder":[40],"Representations":[41],"from":[42],"Transformers":[43],"(BERT)":[44],"assessing":[47],"similarity":[49,107],"between":[50],"trial":[52],"texts.":[53],"We":[54],"compared":[55],"an":[56],"aggregated":[58],"summary":[59],"trials":[62],"reviewed":[63],"at":[64],"Johns":[66],"Hopkins":[67],"Molecular":[68],"Tumor":[69],"Board":[70],"with":[71],"ClinicalTrials.gov":[73],"records,":[74],"focusing":[75],"on":[76],"titles":[78],"and":[79,118],"eligibility":[80],"criteria.":[81],"Seven":[82],"pretrained":[83],"BERT-Based":[84],"were":[86],"used":[87],"our":[89],"analysis.":[90],"Of":[91],"six":[93],"biomedical-domain-specific":[94],"models,":[95],"only":[96],"SciBERT":[97],"outperformed":[98],"original":[100],"BERT":[101,121],"model":[102],"by":[103],"accurately":[104],"assigning":[105],"higher":[106],"scores":[108],"matched":[110],"than":[111],"mismatched":[112],"finding":[115],"promising":[117],"shows":[119],"that":[120],"and,":[122],"likely,":[123],"may":[127],"patient-trial":[129],"matching.":[130]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2}],"updated_date":"2026-03-13T16:22:10.518609","created_date":"2022-01-25T00:00:00"}
