{"id":"https://openalex.org/W7083306477","doi":"https://doi.org/10.48550/arxiv.2509.19327","title":"A systematic review of trial-matching pipelines using large language models","display_name":"A systematic review of trial-matching pipelines using large language models","publication_year":2025,"publication_date":"2025-09-13","ids":{"openalex":"https://openalex.org/W7083306477","doi":"https://doi.org/10.48550/arxiv.2509.19327"},"language":"en","primary_location":{"id":"doi:10.48550/arxiv.2509.19327","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2509.19327","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2509.19327","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Morrison, Braxton A.","orcid":null},"institutions":[{"id":"https://openalex.org/I180670191","display_name":"University of California, San Francisco","ror":"https://ror.org/043mz5j54","country_code":"US","type":"education","lineage":["https://openalex.org/I180670191"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Morrison, Braxton A.","raw_affiliation_strings":["University of California, San Francisco"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of California, San Francisco","institution_ids":["https://openalex.org/I180670191"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Sushil, Madhumita","orcid":null},"institutions":[{"id":"https://openalex.org/I180670191","display_name":"University of California, San Francisco","ror":"https://ror.org/043mz5j54","country_code":"US","type":"education","lineage":["https://openalex.org/I180670191"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sushil, Madhumita","raw_affiliation_strings":["University of California, San Francisco"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of California, San Francisco","institution_ids":["https://openalex.org/I180670191"]}]},{"author_position":"last","author":{"id":null,"display_name":"Young, Jacob S.","orcid":null},"institutions":[{"id":"https://openalex.org/I180670191","display_name":"University of California, San Francisco","ror":"https://ror.org/043mz5j54","country_code":"US","type":"education","lineage":["https://openalex.org/I180670191"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Young, Jacob S.","raw_affiliation_strings":["University of California, San Francisco"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of California, San Francisco","institution_ids":["https://openalex.org/I180670191"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I180670191"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T14400","display_name":"Medical Coding and Health Information","score":0.11389999836683273,"subfield":{"id":"https://openalex.org/subfields/3605","display_name":"Health Information Management"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T14400","display_name":"Medical Coding and Health Information","score":0.11389999836683273,"subfield":{"id":"https://openalex.org/subfields/3605","display_name":"Health Information Management"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10556","display_name":"Global Cancer Incidence and Screening","score":0.09709999710321426,"subfield":{"id":"https://openalex.org/subfields/2730","display_name":"Oncology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T12103","display_name":"Cancer Risks and Factors","score":0.08179999887943268,"subfield":{"id":"https://openalex.org/subfields/2730","display_name":"Oncology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.6330000162124634},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.531000018119812},{"id":"https://openalex.org/keywords/systematic-review","display_name":"Systematic review","score":0.3718000054359436},{"id":"https://openalex.org/keywords/pipeline-transport","display_name":"Pipeline transport","score":0.3447999954223633},{"id":"https://openalex.org/keywords/clinical-trial","display_name":"Clinical trial","score":0.32409998774528503},{"id":"https://openalex.org/keywords/predictive-modelling","display_name":"Predictive modelling","score":0.31779998540878296}],"concepts":[{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.6330000162124634},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6244999766349792},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.531000018119812},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.5209000110626221},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3880999982357025},{"id":"https://openalex.org/C189708586","wikidata":"https://www.wikidata.org/wiki/Q1504425","display_name":"Systematic review","level":3,"score":0.3718000054359436},{"id":"https://openalex.org/C175309249","wikidata":"https://www.wikidata.org/wiki/Q725864","display_name":"Pipeline transport","level":2,"score":0.3447999954223633},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.33889999985694885},{"id":"https://openalex.org/C535046627","wikidata":"https://www.wikidata.org/wiki/Q30612","display_name":"Clinical trial","level":2,"score":0.32409998774528503},{"id":"https://openalex.org/C45804977","wikidata":"https://www.wikidata.org/wiki/Q7239673","display_name":"Predictive modelling","level":2,"score":0.31779998540878296},{"id":"https://openalex.org/C109359841","wikidata":"https://www.wikidata.org/wiki/Q728944","display_name":"Inclusion (mineral)","level":2,"score":0.3122999966144562},{"id":"https://openalex.org/C12174686","wikidata":"https://www.wikidata.org/wiki/Q1058438","display_name":"Risk assessment","level":2,"score":0.2953999936580658},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.28929999470710754},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.28349998593330383},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.27469998598098755},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.26980000734329224},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.26669999957084656},{"id":"https://openalex.org/C2779473830","wikidata":"https://www.wikidata.org/wiki/Q1540899","display_name":"MEDLINE","level":2,"score":0.25380000472068787}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2509.19327","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2509.19327","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2509.19327","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2509.19327","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.6319798827171326}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Matching":[0],"patients":[1],"to":[2,23,59,203,221],"clinical":[3,60,204],"trial":[4,61,205],"options":[5],"is":[6,18,170],"critical":[7,227],"for":[8,159,228],"identifying":[9,56],"novel":[10],"treatments,":[11],"especially":[12],"in":[13,107,130,200],"oncology.":[14],"However,":[15],"manual":[16],"matching":[17,75,131],"labor-intensive":[19],"and":[20,46,52,109,132,154,181,194,210,219,223],"error-prone,":[21],"leading":[22],"recruitment":[24],"delays.":[25],"Pipelines":[26],"incorporating":[27],"large":[28,165,177],"language":[29],"models":[30,158,166],"(LLMs)":[31],"offer":[32],"a":[33,38],"promising":[34,208],"solution.":[35],"We":[36],"conducted":[37],"systematic":[39],"review":[40,197],"of":[41,164,190],"studies":[42,72,116],"published":[43],"between":[44],"2020":[45],"2025":[47],"from":[48],"three":[49],"academic":[50],"databases":[51],"one":[53,103],"preprint":[54],"server,":[55],"LLM-based":[57],"approaches":[58],"matching.":[62],"Of":[63],"126":[64],"unique":[65],"articles,":[66],"31":[67],"met":[68],"inclusion":[69],"criteria.":[70],"Reviewed":[71],"focused":[73],"on":[74],"patient-to-criterion":[76],"only":[77,80,83,88],"(n=4),":[78],"patient-to-trial":[79],"(n=10),":[81],"trial-to-patient":[82],"(n=2),":[84],"binary":[85],"eligibility":[86,133],"classification":[87],"(n=1)":[89],"or":[90],"combined":[91],"tasks":[92],"(n=14).":[93],"Sixteen":[94],"used":[95,99,104],"synthetic":[96],"data;":[97,102],"fourteen":[98],"real":[100],"patient":[101],"both.":[105],"Variability":[106],"datasets":[108],"evaluation":[110],"metrics":[111],"limited":[112],"cross-study":[113],"comparability.":[114],"In":[115],"with":[117,144],"direct":[118],"comparisons,":[119],"the":[120,148],"GPT-4":[121],"model":[122],"consistently":[123],"outperformed":[124],"other":[125],"models,":[126],"even":[127],"finely-tuned":[128],"ones,":[129],"extraction,":[134],"albeit":[135],"at":[136],"higher":[137],"cost.":[138],"Promising":[139],"strategies":[140],"included":[141],"zero-shot":[142],"prompting":[143],"proprietary":[145],"LLMs":[146,202],"like":[147],"GPT-4o":[149],"model,":[150],"advanced":[151],"retrieval":[152],"methods,":[153],"fine-tuning":[155],"smaller,":[156],"open-source":[157],"data":[160,179,192],"privacy":[161],"when":[162],"incorporation":[163],"into":[167],"hospital":[168],"infrastructure":[169],"infeasible.":[171],"Key":[172],"challenges":[173,183],"include":[174],"accessing":[175],"sufficiently":[176],"real-world":[178],"sets,":[180,218],"deployment-associated":[182],"such":[184],"as":[185],"reducing":[186],"cost,":[187],"mitigating":[188],"risk":[189],"hallucinations,":[191],"leakage,":[193],"bias.":[195],"This":[196],"synthesizes":[198],"progress":[199],"applying":[201],"matching,":[206],"highlighting":[207],"directions":[209],"key":[211],"limitations.":[212],"Standardized":[213],"metrics,":[214],"more":[215],"realistic":[216],"test":[217],"attention":[220],"cost-efficiency":[222],"fairness":[224],"will":[225],"be":[226],"broader":[229],"deployment.":[230]},"counts_by_year":[],"updated_date":"2026-06-26T08:34:08.712188","created_date":"2025-10-10T00:00:00"}
