{"id":"https://openalex.org/W3034049032","doi":"https://doi.org/10.1145/3379174.3392319","title":"Duplicate Identification Algorithms in SaaS Platforms","display_name":"Duplicate Identification Algorithms in SaaS Platforms","publication_year":2020,"publication_date":"2020-06-07","ids":{"openalex":"https://openalex.org/W3034049032","doi":"https://doi.org/10.1145/3379174.3392319","mag":"3034049032"},"language":"en","primary_location":{"id":"doi:10.1145/3379174.3392319","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3379174.3392319","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2020 ACM Workshop on Intelligent Cross-Data Analysis and Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5025877753","display_name":"Dac Dang Khoa Nguyen","orcid":"https://orcid.org/0000-0003-3233-4972"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Dac Nguyen","raw_affiliation_strings":["AISIA Research Lab, Ho Chi Minh, Vietnam"],"affiliations":[{"raw_affiliation_string":"AISIA Research Lab, Ho Chi Minh, Vietnam","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043280322","display_name":"Quy Hy Nguyen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Quy H. Nguyen","raw_affiliation_strings":["AISIA Research Lab, Ho Chi Minh, Vietnam"],"affiliations":[{"raw_affiliation_string":"AISIA Research Lab, Ho Chi Minh, Vietnam","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023083273","display_name":"Minh-Son Dao","orcid":"https://orcid.org/0000-0003-3044-8175"},"institutions":[{"id":"https://openalex.org/I90023481","display_name":"National Institute of Information and Communications Technology","ror":"https://ror.org/016bgq349","country_code":"JP","type":"facility","lineage":["https://openalex.org/I90023481"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Minh-Son Dao","raw_affiliation_strings":["National Institute of Information and Communications Technology, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Information and Communications Technology, Tokyo, Japan","institution_ids":["https://openalex.org/I90023481"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064606251","display_name":"Duc\u2010Tien Dang\u2010Nguyen","orcid":"https://orcid.org/0000-0002-2761-2213"},"institutions":[{"id":"https://openalex.org/I4432739","display_name":"University of Bergen","ror":"https://ror.org/03zga2b32","country_code":"NO","type":"education","lineage":["https://openalex.org/I4432739"]}],"countries":["NO"],"is_corresponding":false,"raw_author_name":"Duc-Tien Dang-Nguyen","raw_affiliation_strings":["University of Bergen, Bergen, Norway"],"affiliations":[{"raw_affiliation_string":"University of Bergen, Bergen, Norway","institution_ids":["https://openalex.org/I4432739"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014224452","display_name":"Cathal Gurrin","orcid":"https://orcid.org/0000-0003-2903-3968"},"institutions":[{"id":"https://openalex.org/I42934936","display_name":"Dublin City University","ror":"https://ror.org/04a1a1e81","country_code":"IE","type":"education","lineage":["https://openalex.org/I42934936"]}],"countries":["IE"],"is_corresponding":false,"raw_author_name":"Cathal Gurrin","raw_affiliation_strings":["Dublin City University, Dublin, Ireland"],"affiliations":[{"raw_affiliation_string":"Dublin City University, Dublin, Ireland","institution_ids":["https://openalex.org/I42934936"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5051882105","display_name":"Binh T. Nguyen","orcid":"https://orcid.org/0000-0001-5249-9702"},"institutions":[{"id":"https://openalex.org/I23582244","display_name":"Ho Chi Minh City University of Science","ror":"https://ror.org/05jfbgm49","country_code":"VN","type":"education","lineage":["https://openalex.org/I123565023","https://openalex.org/I23582244"]},{"id":"https://openalex.org/I123565023","display_name":"Vietnam National University Ho Chi Minh City","ror":"https://ror.org/00waaqh38","country_code":"VN","type":"education","lineage":["https://openalex.org/I123565023"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Binh T. Nguyen","raw_affiliation_strings":["VNU HCM - University of Science, Ho Chi Minh, Vietnam"],"affiliations":[{"raw_affiliation_string":"VNU HCM - University of Science, Ho Chi Minh, Vietnam","institution_ids":["https://openalex.org/I23582244","https://openalex.org/I123565023"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5025877753"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.07275382,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"33","last_page":"38"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9876999855041504,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/software-as-a-service","display_name":"Software as a service","score":0.8725559115409851},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8258529305458069},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.6335103511810303},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5688296556472778},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.4825981855392456},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4541563093662262},{"id":"https://openalex.org/keywords/crawling","display_name":"Crawling","score":0.45307278633117676},{"id":"https://openalex.org/keywords/precision-and-recall","display_name":"Precision and recall","score":0.4408545196056366},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.36684131622314453},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3448759913444519},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.329349160194397},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.12064051628112793},{"id":"https://openalex.org/keywords/software-development","display_name":"Software development","score":0.1177850067615509}],"concepts":[{"id":"https://openalex.org/C175133352","wikidata":"https://www.wikidata.org/wiki/Q1254596","display_name":"Software as a service","level":4,"score":0.8725559115409851},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8258529305458069},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.6335103511810303},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5688296556472778},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4825981855392456},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4541563093662262},{"id":"https://openalex.org/C100368936","wikidata":"https://www.wikidata.org/wiki/Q1411725","display_name":"Crawling","level":2,"score":0.45307278633117676},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.4408545196056366},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.36684131622314453},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3448759913444519},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.329349160194397},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.12064051628112793},{"id":"https://openalex.org/C529173508","wikidata":"https://www.wikidata.org/wiki/Q638608","display_name":"Software development","level":3,"score":0.1177850067615509},{"id":"https://openalex.org/C105702510","wikidata":"https://www.wikidata.org/wiki/Q514","display_name":"Anatomy","level":1,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3379174.3392319","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3379174.3392319","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2020 ACM Workshop on Intelligent Cross-Data Analysis and Retrieval","raw_type":"proceedings-article"},{"id":"pmh:oai:doras.dcu.ie:24667","is_oa":false,"landing_page_url":"http://doras.dcu.ie/24667/","pdf_url":null,"source":{"id":"https://openalex.org/S4306401511","display_name":"Dublin City University Open Access Institutional Repository (Dublin City University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I42934936","host_organization_name":"Dublin City University","host_organization_lineage":["https://openalex.org/I42934936"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"  Nguyen, Dac, Nguyen, Quy H., Dao, Minh-Son, Dang-Nguyen, Duc-Tien ORCID: 0000-0002-2761-2213 &lt;https://orcid.org/0000-0002-2761-2213&gt;, Gurrin, Cathal ORCID: 0000-0003-2903-3968 &lt;https://orcid.org/0000-0003-2903-3968&gt; and Nguyen, Binh T.  (2020) Duplicate identification algorithms in SaaS platforms.  In: 2020 Intelligent Cross-Data Analysis and Retrieval Workshop (ICDAR'20), 20-26 Oct 2020, Dublin, Ireland.  ISBN 978-1-4503-7509-2     ","raw_type":"Conference or Workshop Item"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5099999904632568,"display_name":"Partnerships for the goals","id":"https://metadata.un.org/sdg/17"}],"awards":[{"id":"https://openalex.org/G3993907298","display_name":null,"funder_award_id":"13/RC/2106","funder_id":"https://openalex.org/F4320320847","funder_display_name":"Science Foundation Ireland"},{"id":"https://openalex.org/G4053123157","display_name":null,"funder_award_id":"SFI/13/RC/2106","funder_id":"https://openalex.org/F4320320847","funder_display_name":"Science Foundation Ireland"}],"funders":[{"id":"https://openalex.org/F4320320847","display_name":"Science Foundation Ireland","ror":"https://ror.org/0271asj38"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W2034190452","https://openalex.org/W2043481183","https://openalex.org/W2168440643","https://openalex.org/W2171590421","https://openalex.org/W2612342472","https://openalex.org/W2798457541","https://openalex.org/W2798649495","https://openalex.org/W2810509939","https://openalex.org/W2894176037","https://openalex.org/W2954406561","https://openalex.org/W3091905774"],"related_works":["https://openalex.org/W4321258516","https://openalex.org/W2051833850","https://openalex.org/W4287845917","https://openalex.org/W3156164993","https://openalex.org/W1864280877","https://openalex.org/W3173750053","https://openalex.org/W2385015894","https://openalex.org/W4206347313","https://openalex.org/W2171573941","https://openalex.org/W2051861582"],"abstract_inverted_index":{"Existing":[0],"duplicate":[1,21],"records":[2],"is":[3],"one":[4,25],"of":[5,99],"the":[6,20,37,50,60,94,140],"most":[7],"common":[8],"issues":[9],"in":[10,24,54,97,118,139],"many":[11],"Software-as-as-Service":[12],"(SaaS)":[13],"platforms.":[14],"In":[15],"this":[16],"paper,":[17],"we":[18,64],"study":[19],"identification":[22],"problem":[23],"specific":[26],"SaaS":[27,61],"platform":[28],"related":[29,141],"to":[30,125,135],"quality":[31],"and":[32,89,102,131],"compliance":[33],"management":[34],"by":[35,68],"using":[36,93,109],"address":[38],"information.":[39],"We":[40,77,123],"interpret":[41],"all":[42,132],"typical":[43],"mistakes":[44],"from":[45,59,72],"users":[46],"that":[47,108],"can":[48,113],"generate":[49],"existent":[51],"duplicated":[52],"organizations":[53],"a":[55,115],"given":[56],"dataset,":[57],"collected":[58],"platform.":[62],"Also,":[63],"create":[65],"another":[66],"set":[67],"crawling":[69],"location":[70],"data":[71],"Open":[73,128],"Address":[74,129],"(US":[75],"Zone).":[76],"compare":[78],"different":[79],"methods,":[80],"including":[81],"Bag-of-words":[82],"(using":[83],"Cosine":[84],"Distance),":[85],"Record":[86],"Linkage":[87],"Toolkits,":[88],"Siamese":[90,110],"Neural":[91,111],"Networks":[92,112],"triplet":[95],"loss,":[96],"terms":[98],"precision,":[100],"recall,":[101],"F1-score.":[103],"The":[104],"experimental":[105],"results":[106],"show":[107],"achieve":[114],"better":[116],"performance":[117],"comparison":[119],"with":[120],"other":[121],"techniques.":[122],"plan":[124],"publish":[126],"our":[127],"dataset":[130],"implementation":[133],"codes":[134],"facilitate":[136],"further":[137],"research":[138],"fields.":[142]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
