{"id":"https://openalex.org/W4380433214","doi":"https://doi.org/10.1145/3588938","title":"Unicorn: A Unified Multi-tasking Model for Supporting Matching Tasks in Data Integration","display_name":"Unicorn: A Unified Multi-tasking Model for Supporting Matching Tasks in Data Integration","publication_year":2023,"publication_date":"2023-05-26","ids":{"openalex":"https://openalex.org/W4380433214","doi":"https://doi.org/10.1145/3588938"},"language":"en","primary_location":{"id":"doi:10.1145/3588938","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3588938","pdf_url":null,"source":{"id":"https://openalex.org/S4387289859","display_name":"Proceedings of the ACM on Management of Data","issn_l":"2836-6573","issn":["2836-6573"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Management of Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082583725","display_name":"Jianhong Tu","orcid":"https://orcid.org/0009-0001-1554-1614"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jianhong Tu","raw_affiliation_strings":["Renmin University of China, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0001-1554-1614","affiliations":[{"raw_affiliation_string":"Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100739546","display_name":"Ju Fan","orcid":"https://orcid.org/0000-0003-4729-9903"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ju Fan","raw_affiliation_strings":["Renmin University of China, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-4729-9903","affiliations":[{"raw_affiliation_string":"Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101824160","display_name":"Nan Tang","orcid":"https://orcid.org/0000-0003-2832-0295"},"institutions":[{"id":"https://openalex.org/I4210138380","display_name":"Qatar Cardiovascular Research Center","ror":"https://ror.org/038vyt185","country_code":"QA","type":"healthcare","lineage":["https://openalex.org/I4210138380"]}],"countries":["QA"],"is_corresponding":false,"raw_author_name":"Nan Tang","raw_affiliation_strings":["QCRI, Doha, Qatar"],"raw_orcid":"https://orcid.org/0000-0003-2832-0295","affiliations":[{"raw_affiliation_string":"QCRI, Doha, Qatar","institution_ids":["https://openalex.org/I4210138380"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092150593","display_name":"Peng Wang","orcid":"https://orcid.org/0009-0009-7699-5490"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peng Wang","raw_affiliation_strings":["Renmin University of China, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0009-7699-5490","affiliations":[{"raw_affiliation_string":"Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100451576","display_name":"Guoliang Li","orcid":"https://orcid.org/0000-0002-1398-0621"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guoliang Li","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-1398-0621","affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008721449","display_name":"Xiaoyong Du","orcid":"https://orcid.org/0000-0002-5757-9135"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoyong Du","raw_affiliation_strings":["Renmin University of China, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-5757-9135","affiliations":[{"raw_affiliation_string":"Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012140948","display_name":"Xiaofeng Jia","orcid":"https://orcid.org/0000-0003-3159-2785"},"institutions":[{"id":"https://openalex.org/I4210096250","display_name":"Beijing Institute of Big Data Research","ror":"https://ror.org/00s1sz824","country_code":"CN","type":"facility","lineage":["https://openalex.org/I20231570","https://openalex.org/I37796252","https://openalex.org/I4210096250"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaofeng Jia","raw_affiliation_strings":["Beijing Big Data Centre, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-3159-2785","affiliations":[{"raw_affiliation_string":"Beijing Big Data Centre, Beijing, China","institution_ids":["https://openalex.org/I4210096250"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5039332794","display_name":"Song Gao","orcid":"https://orcid.org/0000-0003-3572-0326"},"institutions":[{"id":"https://openalex.org/I4210096250","display_name":"Beijing Institute of Big Data Research","ror":"https://ror.org/00s1sz824","country_code":"CN","type":"facility","lineage":["https://openalex.org/I20231570","https://openalex.org/I37796252","https://openalex.org/I4210096250"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Song Gao","raw_affiliation_strings":["Beijing Big Data Centre, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-3572-0326","affiliations":[{"raw_affiliation_string":"Beijing Big Data Centre, Beijing, China","institution_ids":["https://openalex.org/I4210096250"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5082583725"],"corresponding_institution_ids":["https://openalex.org/I78988378"],"apc_list":null,"apc_paid":null,"fwci":6.9036,"has_fulltext":false,"cited_by_count":28,"citation_normalized_percentile":{"value":0.97025411,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"1","issue":"1","first_page":"1","last_page":"26"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9926000237464905,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8098317384719849},{"id":"https://openalex.org/keywords/unicorn","display_name":"Unicorn","score":0.7875385284423828},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.4903583526611328},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.46356260776519775},{"id":"https://openalex.org/keywords/schema-matching","display_name":"Schema matching","score":0.4553847312927246},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.41823622584342957},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4177699089050293},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.38174545764923096},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.33314889669418335},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.32380449771881104},{"id":"https://openalex.org/keywords/data-integration","display_name":"Data integration","score":0.3172455132007599}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8098317384719849},{"id":"https://openalex.org/C2780734062","wikidata":"https://www.wikidata.org/wiki/Q3549947","display_name":"Unicorn","level":2,"score":0.7875385284423828},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.4903583526611328},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46356260776519775},{"id":"https://openalex.org/C2777327318","wikidata":"https://www.wikidata.org/wiki/Q1408390","display_name":"Schema matching","level":3,"score":0.4553847312927246},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.41823622584342957},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4177699089050293},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.38174545764923096},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.33314889669418335},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.32380449771881104},{"id":"https://openalex.org/C72634772","wikidata":"https://www.wikidata.org/wiki/Q386824","display_name":"Data integration","level":2,"score":0.3172455132007599},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3588938","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3588938","pdf_url":null,"source":{"id":"https://openalex.org/S4387289859","display_name":"Proceedings of the ACM on Management of Data","issn_l":"2836-6573","issn":["2836-6573"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Management of Data","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W1870305865","https://openalex.org/W2001496424","https://openalex.org/W2020694521","https://openalex.org/W2115214414","https://openalex.org/W2157060173","https://openalex.org/W2262562434","https://openalex.org/W2340222647","https://openalex.org/W2612526608","https://openalex.org/W2762307198","https://openalex.org/W2798649495","https://openalex.org/W2809290718","https://openalex.org/W3011807731","https://openalex.org/W3014705052","https://openalex.org/W3016473712","https://openalex.org/W3045211065","https://openalex.org/W3080189354","https://openalex.org/W3092962901","https://openalex.org/W3096174636","https://openalex.org/W3123375411","https://openalex.org/W3156850293","https://openalex.org/W3174181645","https://openalex.org/W3197468999","https://openalex.org/W4205922070","https://openalex.org/W4210595653","https://openalex.org/W4234053595","https://openalex.org/W4282010430","https://openalex.org/W4379390578","https://openalex.org/W4381326864","https://openalex.org/W4385572845","https://openalex.org/W4385573115","https://openalex.org/W6605348999"],"related_works":["https://openalex.org/W1128683088","https://openalex.org/W2372910313","https://openalex.org/W3138074544","https://openalex.org/W4310041472","https://openalex.org/W2938811602","https://openalex.org/W2036644834","https://openalex.org/W2408969024","https://openalex.org/W2016611314","https://openalex.org/W2243208152","https://openalex.org/W2080890385"],"abstract_inverted_index":{"Data":[0],"matching":[1,34,86,133,177,207,245],"-":[2,23],"which":[3,50,164],"decides":[4],"whether":[5,171],"two":[6],"data":[7,29,85,129,152,206],"elements":[8,130,153],"(e.g.,":[9],"string,":[10],"tuple,":[11],"column,":[12],"or":[13,46],"knowledge":[14,60,91],"graph":[15],"entity)":[16],"are":[17,51],"the":[18,57,140,189,227],"\"same\"":[19],"(a.k.a.":[20],"a":[21,25,78,118,157,162,166,172,184,193],"match)":[22],"is":[24,42,121,165],"key":[26],"concept":[27],"in":[28],"integration,":[30],"such":[31,117],"as":[32],"entity":[33],"and":[35,55,69,98,101,131,160,209,222,235],"schema":[36],"matching.":[37],"The":[38],"widely":[39],"used":[40],"practice":[41],"to":[43,53,124,169],"build":[44],"task-specific":[45],"even":[47],"dataset-specific":[48],"solutions,":[49],"hard":[52],"generalize":[54],"disable":[56],"opportunities":[58],"of":[59,127,135,151,179],"sharing":[61,92],"that":[62,147,187,211],"can":[63,89,102,215,240],"be":[64],"learned":[65,158,190],"from":[66,95],"different":[67],"datasets":[68,202,236],"multiple":[70,96,99,136,180],"tasks.":[71,87,137],"In":[72],"this":[73],"paper,":[74],"we":[75],"propose":[76],"Unicorn,":[77],"unified":[79,119,213],"model":[80,120,186,214],"for":[81,107,232],"generally":[82],"supporting":[83],"common":[84],"Unicorn":[88,142,182,239],"enable":[90],"by":[93],"learning":[94],"tasks":[97,109,221,234,246],"datasets,":[100],"also":[103,241],"support":[104],"zero-shot":[105,248],"prediction":[106],"new":[108,244],"with":[110,226,247],"zero":[111],"labeled":[112],"matching/non-matching":[113],"pairs.":[114],"However,":[115],"building":[116],"challenging":[122],"due":[123],"heterogeneous":[125],"formats":[126],"input":[128],"various":[132],"semantics":[134,178],"To":[138,175],"address":[139],"challenges,":[141],"employs":[143],"one":[144],"generic":[145],"Encoder":[146],"converts":[148],"any":[149],"pair":[150],"(a,":[154],"b)":[155],"into":[156,192],"representation,":[159],"uses":[161],"Matcher,":[163],"binary":[167],"classifier,":[168],"decide":[170],"matches":[173],"b.":[174],"align":[176],"tasks,":[181,208],"adopts":[183],"mixture-of-experts":[185],"enhances":[188],"representation":[191],"better":[194,217],"representation.":[195],"We":[196],"conduct":[197],"extensive":[198],"experiments":[199],"using":[200],"20":[201],"on":[203,219,223],"seven":[204],"well-studied":[205],"find":[210],"our":[212],"achieve":[216],"performance":[218],"most":[220],"average,":[224],"compared":[225],"state-of-the-art":[228],"specific":[229],"models":[230],"trained":[231],"ad-hoc":[233],"separately.":[237],"Moreover,":[238],"well":[242],"serve":[243],"learning.":[249]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":13},{"year":2024,"cited_by_count":10},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
