{"id":"https://openalex.org/W2516085159","doi":"https://doi.org/10.1109/tmm.2016.2598482","title":"Classification-Based Record Linkage With Pseudonymized Data for Epidemiological Cancer Registries","display_name":"Classification-Based Record Linkage With Pseudonymized Data for Epidemiological Cancer Registries","publication_year":2016,"publication_date":"2016-08-08","ids":{"openalex":"https://openalex.org/W2516085159","doi":"https://doi.org/10.1109/tmm.2016.2598482","mag":"2516085159"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2016.2598482","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2016.2598482","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080854549","display_name":"Yannik Siegert","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yannik Siegert","raw_affiliation_strings":["Epidemiological Cancer Registry of North Rhine-Westphalia, M\u00fcnster, Germany"],"affiliations":[{"raw_affiliation_string":"Epidemiological Cancer Registry of North Rhine-Westphalia, M\u00fcnster, Germany","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022183918","display_name":"Xiaoyi Jiang","orcid":"https://orcid.org/0000-0001-7678-9528"},"institutions":[{"id":"https://openalex.org/I22465464","display_name":"University of M\u00fcnster","ror":"https://ror.org/00pd74e08","country_code":"DE","type":"education","lineage":["https://openalex.org/I22465464"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Xiaoyi Jiang","raw_affiliation_strings":["Department of Mathematics and Computer Science, University of M\u00fcunster, M\u00fcnster, Germany"],"affiliations":[{"raw_affiliation_string":"Department of Mathematics and Computer Science, University of M\u00fcunster, M\u00fcnster, Germany","institution_ids":["https://openalex.org/I22465464"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079243673","display_name":"Volker Krieg","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Volker Krieg","raw_affiliation_strings":["Epidemiological Cancer Registry of North Rhine-Westphalia, M\u00fcnster, Germany"],"affiliations":[{"raw_affiliation_string":"Epidemiological Cancer Registry of North Rhine-Westphalia, M\u00fcnster, Germany","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5020869104","display_name":"Sebastian Bartholom\u00e4us","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sebastian Bartholomaus","raw_affiliation_strings":["Epidemiological Cancer Registry of North Rhine-Westphalia, M\u00fcnster, Germany"],"affiliations":[{"raw_affiliation_string":"Epidemiological Cancer Registry of North Rhine-Westphalia, M\u00fcnster, Germany","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5080854549"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.7953,"has_fulltext":false,"cited_by_count":17,"citation_normalized_percentile":{"value":0.87231507,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"18","issue":"10","first_page":"1929","last_page":"1941"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9758999943733215,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11396","display_name":"Artificial Intelligence in Healthcare","score":0.9624999761581421,"subfield":{"id":"https://openalex.org/subfields/3605","display_name":"Health Information Management"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8307560086250305},{"id":"https://openalex.org/keywords/record-linkage","display_name":"Record linkage","score":0.6637297868728638},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.5637520551681519},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5372194647789001},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4742273986339569},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.47149938344955444},{"id":"https://openalex.org/keywords/decision-tree","display_name":"Decision tree","score":0.4641614556312561},{"id":"https://openalex.org/keywords/workload","display_name":"Workload","score":0.4363614320755005},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.42626678943634033},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.4163201153278351},{"id":"https://openalex.org/keywords/medicine","display_name":"Medicine","score":0.10989663004875183},{"id":"https://openalex.org/keywords/population","display_name":"Population","score":0.09270957112312317}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8307560086250305},{"id":"https://openalex.org/C142210648","wikidata":"https://www.wikidata.org/wiki/Q1266546","display_name":"Record linkage","level":3,"score":0.6637297868728638},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.5637520551681519},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5372194647789001},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4742273986339569},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47149938344955444},{"id":"https://openalex.org/C84525736","wikidata":"https://www.wikidata.org/wiki/Q831366","display_name":"Decision tree","level":2,"score":0.4641614556312561},{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.4363614320755005},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.42626678943634033},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.4163201153278351},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.10989663004875183},{"id":"https://openalex.org/C2908647359","wikidata":"https://www.wikidata.org/wiki/Q2625603","display_name":"Population","level":2,"score":0.09270957112312317},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C99454951","wikidata":"https://www.wikidata.org/wiki/Q932068","display_name":"Environmental health","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2016.2598482","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2016.2598482","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Good health and well-being","id":"https://metadata.un.org/sdg/3","score":0.6899999976158142}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W157213038","https://openalex.org/W1586468107","https://openalex.org/W1618905105","https://openalex.org/W1964832275","https://openalex.org/W1970026646","https://openalex.org/W1982211005","https://openalex.org/W2003452793","https://openalex.org/W2021292601","https://openalex.org/W2029128277","https://openalex.org/W2035615211","https://openalex.org/W2052390074","https://openalex.org/W2053091309","https://openalex.org/W2058751152","https://openalex.org/W2073471108","https://openalex.org/W2103863920","https://openalex.org/W2124868070","https://openalex.org/W2161600801","https://openalex.org/W2163756102","https://openalex.org/W2358843920","https://openalex.org/W2396219269","https://openalex.org/W2487087946","https://openalex.org/W4230502578","https://openalex.org/W4234053595","https://openalex.org/W6636501900"],"related_works":["https://openalex.org/W986318368","https://openalex.org/W2000785801","https://openalex.org/W2384410913","https://openalex.org/W2352878646","https://openalex.org/W2990194547","https://openalex.org/W2004734601","https://openalex.org/W2130149817","https://openalex.org/W1480123525","https://openalex.org/W2620865396","https://openalex.org/W4224922629"],"abstract_inverted_index":{"Cancer":[0,23],"is":[1,49,93,124,134],"one":[2],"of":[3,42,52,87,97,109,113,128,173,182,202,209,220,230,238,240,242],"the":[4,12,43,50,67,80,88,107,111,129,180,191,203,211,232],"widest":[5],"spread":[6],"diseases":[7],"in":[8,46,215],"human":[9,233],"society.":[10],"Therefore,":[11],"need":[13,112],"has":[14,82],"grown":[15],"to":[16,59,75,138,163,194,222],"monitor,":[17],"evaluate,":[18],"and":[19,39],"predict":[20],"its":[21],"development.":[22],"registries":[24,69],"address":[25],"this":[26,47],"problem":[27,108],"by":[28,126,150,235],"collecting":[29],"data":[30,53],"on":[31,170],"cancer":[32,62,68,176,184],"cases,":[33],"striving":[34],"for":[35],"high":[36],"quality,":[37],"accuracy,":[38],"completeness.":[40],"One":[41],"basic":[44],"challenges":[45],"context":[48],"linkage":[51,214],"from":[54,179,206],"multiple":[55],"sources.":[56],"In":[57,186],"order":[58,237],"link":[60],"new":[61],"records":[63,98,116,177],"with":[64,117,227],"existing":[65],"ones,":[66],"typically":[70],"use":[71],"an":[72,199,236],"algorithm":[73,81],"referred":[74],"as":[76],"record":[77,213],"linkage.":[78],"Although":[79],"automated":[83],"a":[84,94,151,171,183,207,228],"significant":[85],"amount":[86],"linking":[89],"process,":[90],"there":[91],"still":[92],"certain":[95],"percentage":[96],"that":[99,145],"cannot":[100],"be":[101,148,225],"linked":[102],"automatically.":[103],"This":[104],"study":[105],"addresses":[106],"reducing":[110,231],"manually":[114],"matching":[115],"machine":[118],"learning":[119],"methods.":[120],"The":[121,131],"particular":[122],"challenge":[123],"caused":[125],"pseudonymization":[127],"data.":[130],"main":[132],"contribution":[133],"thus":[135],"finding":[136],"ways":[137],"encode":[139],"the-pseudonymized-data,":[140],"i.e.,":[141],"feature":[142],"extraction":[143],"so":[144],"it":[146],"can":[147],"interpreted":[149],"classifier.":[152],"Three":[153],"classifiers":[154],"(neural":[155],"network,":[156],"support":[157],"vector":[158],"machines,":[159],"decision":[160],"tree)":[161],"manage":[162],"achieve":[164],"at":[165],"least":[166],"93%":[167],"classification":[168],"rate":[169],"dataset":[172],"73":[174],"000":[175],"extracted":[178],"inventory":[181],"registry.":[185],"addition,":[187],"ensemble":[188],"techniques":[189],"boost":[190],"performance":[192],"further":[193],"over":[195],"95%.":[196],"We":[197],"present":[198],"in-depth":[200],"discussion":[201],"experimental":[204],"results":[205],"perspective":[208],"applying":[210],"classification-based":[212],"real":[216],"practice.":[217],"Two":[218],"scenarios":[219],"translating":[221],"practice":[223],"will":[224],"outlined":[226],"potential":[229],"workload":[234],"magnitude":[239],"hundreds":[241],"hours.":[243]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2020,"cited_by_count":8},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
