{"id":"https://openalex.org/W7118297802","doi":"https://doi.org/10.48550/arxiv.2601.00787","title":"Adapting Natural Language Processing Models Across Jurisdictions: A pilot Study in Canadian Cancer Registries","display_name":"Adapting Natural Language Processing Models Across Jurisdictions: A pilot Study in Canadian Cancer Registries","publication_year":2026,"publication_date":"2026-01-02","ids":{"openalex":"https://openalex.org/W7118297802","doi":"https://doi.org/10.48550/arxiv.2601.00787"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2601.00787","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.00787","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2601.00787","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5004478607","display_name":"Jonathan Simkin","orcid":"https://orcid.org/0000-0001-6184-6024"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Simkin, Jonathan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004441174","display_name":"Lovedeep Gondara","orcid":"https://orcid.org/0000-0003-0287-9672"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gondara, Lovedeep","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122240501","display_name":"Zeeshan Rizvi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rizvi, Zeeshan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028609896","display_name":"Gerry Doyle","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Doyle, Gregory","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122280161","display_name":"Jeff Dowden","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dowden, Jeff","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122172007","display_name":"Dan Bond","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bond, Dan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122039357","display_name":"Desmond Martin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Martin, Desmond","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5122162249","display_name":"Raymond Ng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ng, Raymond","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5004478607"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.43880000710487366,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.43880000710487366,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.15719999372959137,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.08110000193119049,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.5483999848365784},{"id":"https://openalex.org/keywords/interoperability","display_name":"Interoperability","score":0.5410000085830688},{"id":"https://openalex.org/keywords/cancer-registry","display_name":"Cancer registry","score":0.5232999920845032},{"id":"https://openalex.org/keywords/cancer","display_name":"Cancer","score":0.5048999786376953},{"id":"https://openalex.org/keywords/recall","display_name":"Recall","score":0.3937999904155731},{"id":"https://openalex.org/keywords/precision-and-recall","display_name":"Precision and recall","score":0.3352000117301941},{"id":"https://openalex.org/keywords/abstraction","display_name":"Abstraction","score":0.29159998893737793}],"concepts":[{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.5483999848365784},{"id":"https://openalex.org/C20136886","wikidata":"https://www.wikidata.org/wiki/Q749647","display_name":"Interoperability","level":2,"score":0.5410000085830688},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5383999943733215},{"id":"https://openalex.org/C2778527826","wikidata":"https://www.wikidata.org/wiki/Q1787017","display_name":"Cancer registry","level":3,"score":0.5232999920845032},{"id":"https://openalex.org/C121608353","wikidata":"https://www.wikidata.org/wiki/Q12078","display_name":"Cancer","level":2,"score":0.5048999786376953},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.474700003862381},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.47189998626708984},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.45500001311302185},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.41519999504089355},{"id":"https://openalex.org/C100660578","wikidata":"https://www.wikidata.org/wiki/Q18733","display_name":"Recall","level":2,"score":0.3937999904155731},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.3352000117301941},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.32019999623298645},{"id":"https://openalex.org/C124304363","wikidata":"https://www.wikidata.org/wiki/Q673661","display_name":"Abstraction","level":2,"score":0.29159998893737793},{"id":"https://openalex.org/C2779714256","wikidata":"https://www.wikidata.org/wiki/Q25305062","display_name":"Multiple Models","level":2,"score":0.28369998931884766},{"id":"https://openalex.org/C119898033","wikidata":"https://www.wikidata.org/wiki/Q3433888","display_name":"Ensemble forecasting","level":2,"score":0.28189998865127563},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2757999897003174},{"id":"https://openalex.org/C45827449","wikidata":"https://www.wikidata.org/wiki/Q5270338","display_name":"Diagnosis code","level":3,"score":0.2669000029563904},{"id":"https://openalex.org/C206497026","wikidata":"https://www.wikidata.org/wiki/Q1753883","display_name":"SNOMED CT","level":3,"score":0.26420000195503235},{"id":"https://openalex.org/C160016529","wikidata":"https://www.wikidata.org/wiki/Q2484317","display_name":"Tier 2 network","level":2,"score":0.26170000433921814},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.2515000104904175},{"id":"https://openalex.org/C2779473830","wikidata":"https://www.wikidata.org/wiki/Q1540899","display_name":"MEDLINE","level":2,"score":0.2502000033855438}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2601.00787","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.00787","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2601.00787","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.00787","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.4358609616756439,"display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Population-based":[0],"cancer":[1,22,72,252],"registries":[2],"depend":[3],"on":[4],"pathology":[5,86,253],"reports":[6,87],"as":[7],"their":[8,32],"primary":[9],"diagnostic":[10],"source,":[11],"yet":[12],"manual":[13],"abstraction":[14],"is":[15],"resource-intensive":[16],"and":[17,83,102,117,167,176,190,200,220,245,254],"contributes":[18],"to":[19,34,143,171,195],"delays":[20],"in":[21,74,137,224,232],"data.":[23],"While":[24],"transformer-based":[25],"NLP":[26,243],"systems":[27],"have":[28],"improved":[29],"registry":[30,255],"workflows,":[31],"ability":[33],"generalize":[35],"across":[36],"jurisdictions":[37],"with":[38,145,174,198],"differing":[39],"reporting":[40],"conventions":[41],"remains":[42],"poorly":[43],"understood.":[44],"We":[45,227],"present":[46],"the":[47,60,89,128,153,179,185,203],"first":[48],"cross-provincial":[49],"evaluation":[50],"of":[51,80,165],"adapting":[52],"BCCRTron,":[53],"a":[54,67,157,161,229,246],"domain-adapted":[55],"transformer":[56,69],"model":[57,235,250],"developed":[58],"at":[59],"British":[61],"Columbia":[62],"Cancer":[63,93],"Registry,":[64],"alongside":[65],"GatorTron,":[66],"biomedical":[68],"model,":[70],"for":[71,96,178,202,251],"surveillance":[73],"Canada.":[75],"Our":[76],"training":[77],"dataset":[78],"consisted":[79],"approximately":[81],"104,000":[82],"22,000":[84],"de-identified":[85],"from":[88],"Newfoundland":[90],"&amp;":[91],"Labrador":[92],"Registry":[94],"(NLCR)":[95],"Tier":[97,103,162,183],"1":[98,163],"(cancer":[99],"vs.":[100,106],"non-cancer)":[101],"2":[104],"(reportable":[105],"non-reportable)":[107],"tasks,":[108],"respectively.":[109],"Both":[110],"models":[111,130,155],"were":[112],"fine-tuned":[113],"using":[114,156],"complementary":[115,213],"synoptic":[116],"diagnosis":[118],"focused":[119],"report":[120],"section":[121],"input":[122],"pipelines.":[123],"Across":[124],"NLCR":[125],"test":[126],"sets,":[127],"adapted":[129],"maintained":[131],"high":[132],"performance,":[133],"demonstrating":[134],"transformers":[135],"pretrained":[136],"one":[138],"jurisdiction":[139],"can":[140],"be":[141],"localized":[142],"another":[144],"modest":[146],"fine-tuning.":[147],"To":[148],"improve":[149,221],"sensitivity,":[150],"we":[151],"combined":[152],"two":[154],"conservative":[158],"OR-ensemble":[159],"achieving":[160],"recall":[164,189],"0.99":[166,188],"reduced":[168,191],"missed":[169,192,218],"cancers":[170,194,219],"24,":[172],"compared":[173,197],"48":[175],"54":[177,199],"standalone":[180],"models.":[181,205],"For":[182],"2,":[184],"ensemble":[186,211],"achieved":[187],"reportable":[193],"33,":[196],"46":[201],"individual":[204],"These":[206],"findings":[207],"demonstrate":[208],"that":[209],"an":[210],"combining":[212],"text":[214],"representations":[215],"substantially":[216],"reduce":[217],"error":[222],"coverage":[223],"cancer-registry":[225],"NLP.":[226],"implement":[228],"privacy-preserving":[230],"workflow":[231],"which":[233],"only":[234],"weights":[236],"are":[237],"shared":[238],"between":[239],"provinces,":[240],"supporting":[241],"interoperable":[242],"infrastructure":[244],"future":[247],"pan-Canadian":[248],"foundation":[249],"workflows.":[256]},"counts_by_year":[],"updated_date":"2026-01-08T20:10:11.968330","created_date":"2026-01-08T00:00:00"}
