{"id":"https://openalex.org/W2156317344","doi":"https://doi.org/10.1145/2018358.2018392","title":"Public record aggregation using semi-supervised entity resolution","display_name":"Public record aggregation using semi-supervised entity resolution","publication_year":2011,"publication_date":"2011-06-06","ids":{"openalex":"https://openalex.org/W2156317344","doi":"https://doi.org/10.1145/2018358.2018392","mag":"2156317344"},"language":"en","primary_location":{"id":"doi:10.1145/2018358.2018392","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2018358.2018392","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 13th International Conference on Artificial Intelligence and Law","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5025701458","display_name":"Jack G. Conrad","orcid":"https://orcid.org/0000-0001-9114-9385"},"institutions":[{"id":"https://openalex.org/I68384125","display_name":"Thomson Reuters (United States)","ror":"https://ror.org/00m7gt169","country_code":"US","type":"company","lineage":["https://openalex.org/I68384125"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jack G. Conrad","raw_affiliation_strings":["Thomson Reuters Research and Development, Saint Paul, Minnesota"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Thomson Reuters Research and Development, Saint Paul, Minnesota","institution_ids":["https://openalex.org/I68384125"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083256126","display_name":"Christopher Dozier","orcid":null},"institutions":[{"id":"https://openalex.org/I68384125","display_name":"Thomson Reuters (United States)","ror":"https://ror.org/00m7gt169","country_code":"US","type":"company","lineage":["https://openalex.org/I68384125"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Christopher Dozier","raw_affiliation_strings":["Thomson Reuters Research and Development, Saint Paul, Minnesota"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Thomson Reuters Research and Development, Saint Paul, Minnesota","institution_ids":["https://openalex.org/I68384125"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014277446","display_name":"Hugo Molina-Salgado","orcid":null},"institutions":[{"id":"https://openalex.org/I68384125","display_name":"Thomson Reuters (United States)","ror":"https://ror.org/00m7gt169","country_code":"US","type":"company","lineage":["https://openalex.org/I68384125"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hugo Molina-Salgado","raw_affiliation_strings":["Thomson Reuters Research and Development, Saint Paul, Minnesota"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Thomson Reuters Research and Development, Saint Paul, Minnesota","institution_ids":["https://openalex.org/I68384125"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014791741","display_name":"Merine Thomas","orcid":null},"institutions":[{"id":"https://openalex.org/I68384125","display_name":"Thomson Reuters (United States)","ror":"https://ror.org/00m7gt169","country_code":"US","type":"company","lineage":["https://openalex.org/I68384125"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Merine Thomas","raw_affiliation_strings":["Thomson Reuters Research and Development, Saint Paul, Minnesota"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Thomson Reuters Research and Development, Saint Paul, Minnesota","institution_ids":["https://openalex.org/I68384125"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5024312309","display_name":"Sriharsha Veeramachaneni","orcid":null},"institutions":[{"id":"https://openalex.org/I68384125","display_name":"Thomson Reuters (United States)","ror":"https://ror.org/00m7gt169","country_code":"US","type":"company","lineage":["https://openalex.org/I68384125"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sriharsha Veeramachaneni","raw_affiliation_strings":["Thomson Reuters Research and Development, Saint Paul, Minnesota"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Thomson Reuters Research and Development, Saint Paul, Minnesota","institution_ids":["https://openalex.org/I68384125"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.20355798,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"239","last_page":"248"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9373999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7670911550521851},{"id":"https://openalex.org/keywords/record-linkage","display_name":"Record linkage","score":0.7218983173370361},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6784781813621521},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5550795793533325},{"id":"https://openalex.org/keywords/resolution","display_name":"Resolution (logic)","score":0.4941308796405792},{"id":"https://openalex.org/keywords/public-records","display_name":"Public records","score":0.4618612229824066},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.43408453464508057},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4224125146865845},{"id":"https://openalex.org/keywords/linkage","display_name":"Linkage (software)","score":0.4159984886646271},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.33072131872177124},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3040011525154114},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.2937193512916565},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.14673811197280884}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7670911550521851},{"id":"https://openalex.org/C142210648","wikidata":"https://www.wikidata.org/wiki/Q1266546","display_name":"Record linkage","level":3,"score":0.7218983173370361},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6784781813621521},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5550795793533325},{"id":"https://openalex.org/C138268822","wikidata":"https://www.wikidata.org/wiki/Q1051925","display_name":"Resolution (logic)","level":2,"score":0.4941308796405792},{"id":"https://openalex.org/C512564126","wikidata":"https://www.wikidata.org/wiki/Q7257959","display_name":"Public records","level":2,"score":0.4618612229824066},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.43408453464508057},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4224125146865845},{"id":"https://openalex.org/C31266012","wikidata":"https://www.wikidata.org/wiki/Q6554340","display_name":"Linkage (software)","level":3,"score":0.4159984886646271},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.33072131872177124},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3040011525154114},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.2937193512916565},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.14673811197280884},{"id":"https://openalex.org/C149923435","wikidata":"https://www.wikidata.org/wiki/Q37732","display_name":"Demography","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C2908647359","wikidata":"https://www.wikidata.org/wiki/Q2625603","display_name":"Population","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/2018358.2018392","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2018358.2018392","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 13th International Conference on Artificial Intelligence and Law","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.884.3475","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.884.3475","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"https://www.researchgate.net/profile/Jack_Conrad2/publication/221538995_Public_record_aggregation_using_semi-supervised_entity_resolution/links/53ed47880cf26b9b7dc5bc7c.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.6499999761581421,"display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W46452414","https://openalex.org/W1582775886","https://openalex.org/W1646278814","https://openalex.org/W2003470504","https://openalex.org/W2023448865","https://openalex.org/W2036216970","https://openalex.org/W2055405704","https://openalex.org/W2123561513","https://openalex.org/W2154785834","https://openalex.org/W2171574281"],"related_works":["https://openalex.org/W2487032012","https://openalex.org/W2211355040","https://openalex.org/W2808916796","https://openalex.org/W2176311362","https://openalex.org/W3012491082","https://openalex.org/W3211905090","https://openalex.org/W1936317645","https://openalex.org/W4310568775","https://openalex.org/W2178148352","https://openalex.org/W2156317344"],"abstract_inverted_index":{"This":[0],"paper":[1],"describes":[2],"a":[3,33,43,54,77,105,129,136],"highly":[4],"scalable":[5],"state":[6],"of":[7,36,42,70,73,82,84,86,95,119,149],"the":[8,14,113,117,124,146],"art":[9],"record":[10,62,101,131,150],"aggregation":[11],"system":[12,67,115,127],"and":[13,30],"backbone":[15,48],"infrastructure":[16],"developed":[17],"to":[18,28,59,76,97,104,144],"support":[19,49,126],"it.":[20],"The":[21,47,65],"system,":[22,50],"called":[23,51,141],"PeopleMap,":[24],"allows":[25,57],"legal":[26],"professionals":[27],"effectively":[29],"efficiently":[31],"explore":[32],"broad":[34],"spectrum":[35],"public":[37,74,100],"records":[38,75],"databases":[39],"by":[40],"way":[41],"single":[44],"person-centric":[45],"search.":[46],"Concord,":[52],"is":[53,68,116,128],"toolkit":[55],"that":[56,134],"developers":[58],"economically":[60],"create":[61],"resolution":[63,151],"solutions.":[64,152],"PeopleMap":[66,114],"capable":[69],"linking":[71],"billions":[72],"master":[78],"data":[79,102],"set":[80],"consisting":[81],"hundreds":[83],"millions":[85],"person":[87,107],"records.":[88],"It":[89],"was":[90],"constructed":[91],"using":[92],"successive":[93],"applications":[94],"Concord":[96,125],"link":[98],"disparate":[99],"sets":[103],"central":[106],"authority":[108],"file.":[109],"To":[110],"our":[111],"knowledge,":[112],"largest":[118],"its":[120],"kind.":[121],"In":[122],"contrast,":[123],"novel":[130],"linkage":[132],"tool":[133],"uses":[135],"new":[137],"semi-supervised":[138],"training":[139],"technique":[140],"`surrogate":[142],"learning'":[143],"enable":[145],"rapid":[147],"development":[148]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
