{"id":"https://openalex.org/W3119940851","doi":"https://doi.org/10.1145/3428757.3429106","title":"Siamese Neural Network for Unstructured Data Linkage","display_name":"Siamese Neural Network for Unstructured Data Linkage","publication_year":2020,"publication_date":"2020-11-30","ids":{"openalex":"https://openalex.org/W3119940851","doi":"https://doi.org/10.1145/3428757.3429106","mag":"3119940851"},"language":"en","primary_location":{"id":"doi:10.1145/3428757.3429106","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3428757.3429106","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 22nd International Conference on Information Integration and Web-based Applications &amp; Services","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103002181","display_name":"Anna Jurek-Loughrey","orcid":"https://orcid.org/0000-0003-1002-5079"},"institutions":[{"id":"https://openalex.org/I126231945","display_name":"Queen's University Belfast","ror":"https://ror.org/00hswnk62","country_code":"GB","type":"education","lineage":["https://openalex.org/I126231945"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Anna Jurek-Loughrey","raw_affiliation_strings":["Queen's University, EEECS, Belfast, UK"],"affiliations":[{"raw_affiliation_string":"Queen's University, EEECS, Belfast, UK","institution_ids":["https://openalex.org/I126231945"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5103002181"],"corresponding_institution_ids":["https://openalex.org/I126231945"],"apc_list":null,"apc_paid":null,"fwci":0.1856,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.60831472,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"417","last_page":"425"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9721999764442444,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9628000259399414,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7822880148887634},{"id":"https://openalex.org/keywords/record-linkage","display_name":"Record linkage","score":0.7655820846557617},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.6233705878257751},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5764846801757812},{"id":"https://openalex.org/keywords/linkage","display_name":"Linkage (software)","score":0.5582035183906555},{"id":"https://openalex.org/keywords/unstructured-data","display_name":"Unstructured data","score":0.5537470579147339},{"id":"https://openalex.org/keywords/identifier","display_name":"Identifier","score":0.5476731657981873},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.5382490158081055},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5117254257202148},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.49818873405456543},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4978172779083252},{"id":"https://openalex.org/keywords/data-set","display_name":"Data set","score":0.49654704332351685},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4874598979949951},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.4779684245586395},{"id":"https://openalex.org/keywords/data-integration","display_name":"Data integration","score":0.47440004348754883},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4102301299571991},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.29564380645751953},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.06790924072265625}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7822880148887634},{"id":"https://openalex.org/C142210648","wikidata":"https://www.wikidata.org/wiki/Q1266546","display_name":"Record linkage","level":3,"score":0.7655820846557617},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.6233705878257751},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5764846801757812},{"id":"https://openalex.org/C31266012","wikidata":"https://www.wikidata.org/wiki/Q6554340","display_name":"Linkage (software)","level":3,"score":0.5582035183906555},{"id":"https://openalex.org/C2781252014","wikidata":"https://www.wikidata.org/wiki/Q1141900","display_name":"Unstructured data","level":3,"score":0.5537470579147339},{"id":"https://openalex.org/C154504017","wikidata":"https://www.wikidata.org/wiki/Q853614","display_name":"Identifier","level":2,"score":0.5476731657981873},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.5382490158081055},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5117254257202148},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.49818873405456543},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4978172779083252},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.49654704332351685},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4874598979949951},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.4779684245586395},{"id":"https://openalex.org/C72634772","wikidata":"https://www.wikidata.org/wiki/Q386824","display_name":"Data integration","level":2,"score":0.47440004348754883},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4102301299571991},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.29564380645751953},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.06790924072265625},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C149923435","wikidata":"https://www.wikidata.org/wiki/Q37732","display_name":"Demography","level":1,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C2908647359","wikidata":"https://www.wikidata.org/wiki/Q2625603","display_name":"Population","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3428757.3429106","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3428757.3429106","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 22nd International Conference on Information Integration and Web-based Applications &amp; Services","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.qub.ac.uk/portal:publications/96d266b0-230b-4dbd-9087-36a28b02112b","is_oa":false,"landing_page_url":"https://pure.qub.ac.uk/en/publications/96d266b0-230b-4dbd-9087-36a28b02112b","pdf_url":null,"source":{"id":"https://openalex.org/S4306402319","display_name":"Research Portal (Queen's University Belfast)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I126231945","host_organization_name":"Queen's University Belfast","host_organization_lineage":["https://openalex.org/I126231945"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Jurek-Loughrey , A 2021 , Siamese Neural Network for unstructured data linkage . in M Indrawan-Santiago , E Pardede , I L Salvadori , M Steinbauer , I Khalil &amp; G Kotsis (eds) , Proceedings of the 22nd International Conference on Information Integration and Web-based Applications and Services (iiWAS2020) . Association for Computing Machinery , pp. 417-425 , 22nd International Conference on Information Integration and Web-based Applications and Services (iiWAS 2020) , virtual, online , 30/11/2020 . https://doi.org/10.1145/3428757.3429106","raw_type":"contributionToPeriodical"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2487032012","https://openalex.org/W2211355040","https://openalex.org/W2808916796","https://openalex.org/W2989796854","https://openalex.org/W2176311362","https://openalex.org/W3012491082","https://openalex.org/W2911599090","https://openalex.org/W2548183822","https://openalex.org/W4294243532","https://openalex.org/W2141965543"],"abstract_inverted_index":{"Data":[0,13],"integration":[1,20],"is":[2,21,34,41],"one":[3],"of":[4,11,18,24,64,82,87,92,113],"the":[5,9,22,27,53,65,79,93,143,156],"key":[6,16],"problems":[7],"in":[8],"era":[10],"Big":[12],"analytics.":[14],"The":[15,117],"challenge":[17],"data":[19,45,94,138],"identification":[23],"records":[25,54,70,127],"representing":[26],"same":[28,80],"entities":[29],"(e.g.":[30,84,98],"person).":[31],"This":[32],"task":[33],"referred":[35],"to":[36,47,107],"as":[37],"Record":[38,108],"Linkage.":[39],"It":[40],"uncommon":[42],"for":[43],"different":[44,72,137],"sources":[46,73],"share":[48],"a":[49,104,133],"unique":[50],"identifier":[51],"hence":[52],"must":[55],"be":[56,120],"matched":[57],"by":[58,78],"comparing":[59],"their":[60],"corresponding":[61],"values.":[62],"Most":[63],"existing":[66],"methods":[67],"assume":[68,132],"that":[69,142],"across":[71,136],"are":[74],"structured":[75],"and":[76,125,128],"represented":[77],"set":[81],"attributes":[83],"name,":[85],"date":[86],"birth).":[88],"However,":[89],"nowadays":[90],"majority":[91],"comes":[95],"without":[96],"structure":[97],"social":[99],"media":[100],"sites).":[101],"We":[102,140],"propose":[103],"new":[105],"approach":[106],"Linkage":[109],"based":[110],"on":[111,146],"application":[112],"Siamese":[114],"Neural":[115],"Network.":[116],"model":[118,144],"can":[119],"applied":[121],"with":[122,148],"structured,":[123],"semi-structured":[124],"unstructured":[126],"it":[129],"does":[130],"not":[131],"common":[134],"format":[135],"sources.":[139],"demonstrate":[141],"performs":[145],"par":[147],"other":[149],"approaches,":[150],"which":[151],"make":[152],"constraining":[153],"assumptions":[154],"regarding":[155],"data.":[157]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
