{"id":"https://openalex.org/W3119752913","doi":"https://doi.org/10.1145/3431816","title":"Deep Entity Matching","display_name":"Deep Entity Matching","publication_year":2021,"publication_date":"2021-01-06","ids":{"openalex":"https://openalex.org/W3119752913","doi":"https://doi.org/10.1145/3431816","mag":"3119752913"},"language":"en","primary_location":{"id":"doi:10.1145/3431816","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3431816","pdf_url":null,"source":{"id":"https://openalex.org/S110189822","display_name":"Journal of Data and Information Quality","issn_l":"1936-1955","issn":["1936-1955","1936-1963"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Data and Information Quality","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100750716","display_name":"Yuliang Li","orcid":"https://orcid.org/0000-0002-0602-149X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuliang Li","raw_affiliation_strings":["Megagon Labs, Mountain View, CA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Megagon Labs, Mountain View, CA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100428948","display_name":"Jinfeng Li","orcid":"https://orcid.org/0000-0001-9462-2625"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jinfeng Li","raw_affiliation_strings":["Megagon Labs, Mountain View, CA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Megagon Labs, Mountain View, CA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072690374","display_name":"Yoshihiko Suhara","orcid":"https://orcid.org/0000-0001-7554-2865"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yoshihiko Suhara","raw_affiliation_strings":["Megagon Labs, Mountain View, CA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Megagon Labs, Mountain View, CA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113414416","display_name":"Jin Wang","orcid":"https://orcid.org/0000-0002-8298-4378"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jin Wang","raw_affiliation_strings":["Megagon Labs, Mountain View, CA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Megagon Labs, Mountain View, CA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000108670","display_name":"Wataru Hirota","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wataru Hirota","raw_affiliation_strings":["Megagon Labs, Mountain View, CA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Megagon Labs, Mountain View, CA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5026007789","display_name":"Wang-Chiew Tan","orcid":"https://orcid.org/0009-0008-4174-7545"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang-Chiew Tan","raw_affiliation_strings":["Megagon Labs, Mountain View, CA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Megagon Labs, Mountain View, CA","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":6.8113,"has_fulltext":false,"cited_by_count":52,"citation_normalized_percentile":{"value":0.97074645,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":"13","issue":"1","first_page":"1","last_page":"17"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.9089000225067139,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8710655570030212},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.7775533199310303},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6421631574630737},{"id":"https://openalex.org/keywords/scope","display_name":"Scope (computer science)","score":0.5455715656280518},{"id":"https://openalex.org/keywords/entity-linking","display_name":"Entity linking","score":0.5048838257789612},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4995453357696533},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4784691035747528},{"id":"https://openalex.org/keywords/blocking","display_name":"Blocking (statistics)","score":0.42506909370422363},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.3782332241535187},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.32377272844314575},{"id":"https://openalex.org/keywords/knowledge-base","display_name":"Knowledge base","score":0.22181028127670288},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.083767831325531}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8710655570030212},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.7775533199310303},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6421631574630737},{"id":"https://openalex.org/C2778012447","wikidata":"https://www.wikidata.org/wiki/Q1034415","display_name":"Scope (computer science)","level":2,"score":0.5455715656280518},{"id":"https://openalex.org/C96711827","wikidata":"https://www.wikidata.org/wiki/Q17012245","display_name":"Entity linking","level":3,"score":0.5048838257789612},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4995453357696533},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4784691035747528},{"id":"https://openalex.org/C144745244","wikidata":"https://www.wikidata.org/wiki/Q4927286","display_name":"Blocking (statistics)","level":2,"score":0.42506909370422363},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3782332241535187},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.32377272844314575},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.22181028127670288},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.083767831325531},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3431816","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3431816","pdf_url":null,"source":{"id":"https://openalex.org/S110189822","display_name":"Journal of Data and Information Quality","issn_l":"1936-1955","issn":["1936-1955","1936-1963"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Data and Information Quality","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/8","display_name":"Decent work and economic growth","score":0.5199999809265137}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":66,"referenced_works":["https://openalex.org/W1525595230","https://openalex.org/W1731081199","https://openalex.org/W1750205245","https://openalex.org/W1843891098","https://openalex.org/W1964189668","https://openalex.org/W1981590391","https://openalex.org/W1995099886","https://openalex.org/W2007682403","https://openalex.org/W2031250218","https://openalex.org/W2059299892","https://openalex.org/W2061873838","https://openalex.org/W2073471108","https://openalex.org/W2104511295","https://openalex.org/W2114764731","https://openalex.org/W2119465010","https://openalex.org/W2129607105","https://openalex.org/W2134584261","https://openalex.org/W2152184085","https://openalex.org/W2164501930","https://openalex.org/W2165698076","https://openalex.org/W2167847032","https://openalex.org/W2479722630","https://openalex.org/W2493916176","https://openalex.org/W2508865106","https://openalex.org/W2516809705","https://openalex.org/W2542998387","https://openalex.org/W2798435682","https://openalex.org/W2798649495","https://openalex.org/W2804927761","https://openalex.org/W2897405591","https://openalex.org/W2898474096","https://openalex.org/W2933781133","https://openalex.org/W2945883855","https://openalex.org/W2946504770","https://openalex.org/W2948130259","https://openalex.org/W2949780682","https://openalex.org/W2950133940","https://openalex.org/W2951147191","https://openalex.org/W2951438725","https://openalex.org/W2957204582","https://openalex.org/W2957285709","https://openalex.org/W2958815962","https://openalex.org/W2962739339","https://openalex.org/W2962772482","https://openalex.org/W2966720878","https://openalex.org/W2970641574","https://openalex.org/W2970726176","https://openalex.org/W2970875249","https://openalex.org/W2971681342","https://openalex.org/W2978017171","https://openalex.org/W2984651502","https://openalex.org/W2985009327","https://openalex.org/W2999615587","https://openalex.org/W3010144884","https://openalex.org/W3011807731","https://openalex.org/W3033733989","https://openalex.org/W3034997167","https://openalex.org/W3086663505","https://openalex.org/W3097013020","https://openalex.org/W3101366597","https://openalex.org/W3103177583","https://openalex.org/W3105771849","https://openalex.org/W3106020963","https://openalex.org/W4297969478","https://openalex.org/W4300517451","https://openalex.org/W6637618735"],"related_works":["https://openalex.org/W4241523039","https://openalex.org/W2360028903","https://openalex.org/W4280543773","https://openalex.org/W178231042","https://openalex.org/W2366083136","https://openalex.org/W2392835431","https://openalex.org/W2387622493","https://openalex.org/W1932132538","https://openalex.org/W2357832196","https://openalex.org/W1566252468"],"abstract_inverted_index":{"Entity":[0],"matching":[1,83,112,136,155,185,192,204],"refers":[2,49],"to":[3,13,20,38,50,82,85,160,163,187,196,202],"the":[4,14,36,51,78,134,147,158,180,189],"task":[5],"of":[6,54,60,74,80,108,149,172,182],"determining":[7,55,77],"whether":[8,56],"two":[9,57],"different":[10,32,61],"representations":[11,59],"refer":[12],"same":[15],"real-world":[16],"entity.":[17],"It":[18],"continues":[19],"be":[21,39,64],"a":[22,109],"prevalent":[23],"problem":[24,53,68],"for":[25,132],"many":[26],"organizations":[27],"where":[28],"data":[29,167],"resides":[30],"in":[31,124],"sources":[33],"and":[34,41,128,153,179],"duplicates":[35],"need":[37,159,201],"identified":[40],"managed.":[42],"The":[43],"term":[44],"\u201centity":[45],"matching\u201d":[46],"also":[47],"loosely":[48],"broader":[52,190],"heterogeneous":[58],"entities":[62],"should":[63],"associated":[65],"together.":[66],"This":[67],"has":[69,89],"an":[70,106,197],"even":[71,198],"wider":[72],"scope":[73],"applications,":[75],"from":[76],"subsidiaries":[79],"companies":[81],"jobs":[84],"job":[86],"seekers,":[87],"which":[88,104,194],"impactful":[90],"consequences.":[91],"In":[92],"this":[93],"article,":[94],"we":[95,120,139],"first":[96],"report":[97],"our":[98],"recent":[99,122],"system":[100,113],"D":[101],"ITTO":[102],",":[103],"is":[105],"example":[107],"modern":[110],"entity":[111,135,144,154,184,191],"based":[114],"on":[115],"pretrained":[116],"language":[117,130,177],"models.":[118],"Then":[119],"summarize":[121],"solutions":[123,186],"applying":[125],"deep":[126,173],"learning":[127,174],"pre-trained":[129,176],"models":[131],"solving":[133],"task.":[137],"Finally,":[138],"discuss":[140],"research":[141],"directions":[142],"beyond":[143],"matching,":[145],"including":[146],"promise":[148],"synergistically":[150],"integrating":[151],"blocking":[152],"steps":[156],"together,":[157],"examine":[161],"methods":[162],"alleviate":[164],"steep":[165],"training":[166],"requirements":[168],"that":[169],"are":[170],"typical":[171],"or":[175],"models,":[178],"importance":[181],"generalizing":[183],"handle":[188],"problem,":[193],"leads":[195],"more":[199],"pressing":[200],"explain":[203],"outcomes.":[205]},"counts_by_year":[{"year":2026,"cited_by_count":5},{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":11},{"year":2023,"cited_by_count":11},{"year":2022,"cited_by_count":9},{"year":2021,"cited_by_count":7}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
