{"id":"https://openalex.org/W3033256305","doi":"https://doi.org/10.1145/3372278.3391933","title":"An Active Learning Framework for Duplicate Detection in SaaS Platforms","display_name":"An Active Learning Framework for Duplicate Detection in SaaS Platforms","publication_year":2020,"publication_date":"2020-06-02","ids":{"openalex":"https://openalex.org/W3033256305","doi":"https://doi.org/10.1145/3372278.3391933","mag":"3033256305"},"language":"en","primary_location":{"id":"doi:10.1145/3372278.3391933","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3372278.3391933","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2020 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043280322","display_name":"Quy Hy Nguyen","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Quy H. Nguyen","raw_affiliation_strings":["AISIA Research Lab, Ho Chi Minh, Vietnam"],"affiliations":[{"raw_affiliation_string":"AISIA Research Lab, Ho Chi Minh, Vietnam","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025877753","display_name":"Dac Dang Khoa Nguyen","orcid":"https://orcid.org/0000-0003-3233-4972"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dac Nguyen","raw_affiliation_strings":["AISIA Research Lab, Ho Chi Minh, Vietnam"],"affiliations":[{"raw_affiliation_string":"AISIA Research Lab, Ho Chi Minh, Vietnam","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023083273","display_name":"Minh-Son Dao","orcid":"https://orcid.org/0000-0003-3044-8175"},"institutions":[{"id":"https://openalex.org/I90023481","display_name":"National Institute of Information and Communications Technology","ror":"https://ror.org/016bgq349","country_code":"JP","type":"facility","lineage":["https://openalex.org/I90023481"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Minh-Son Dao","raw_affiliation_strings":["National Institute of Information and Communications Technology, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Information and Communications Technology, Tokyo, Japan","institution_ids":["https://openalex.org/I90023481"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064606251","display_name":"Duc\u2010Tien Dang\u2010Nguyen","orcid":"https://orcid.org/0000-0002-2761-2213"},"institutions":[{"id":"https://openalex.org/I4432739","display_name":"University of Bergen","ror":"https://ror.org/03zga2b32","country_code":"NO","type":"education","lineage":["https://openalex.org/I4432739"]}],"countries":["NO"],"is_corresponding":false,"raw_author_name":"Duc-Tien Dang-Nguyen","raw_affiliation_strings":["University of Bergen, Bergen, Norway"],"affiliations":[{"raw_affiliation_string":"University of Bergen, Bergen, Norway","institution_ids":["https://openalex.org/I4432739"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014224452","display_name":"Cathal Gurrin","orcid":"https://orcid.org/0000-0003-2903-3968"},"institutions":[{"id":"https://openalex.org/I42934936","display_name":"Dublin City University","ror":"https://ror.org/04a1a1e81","country_code":"IE","type":"education","lineage":["https://openalex.org/I42934936"]}],"countries":["IE"],"is_corresponding":false,"raw_author_name":"Cathal Gurrin","raw_affiliation_strings":["Dublin City University, Dublin, Ireland"],"affiliations":[{"raw_affiliation_string":"Dublin City University, Dublin, Ireland","institution_ids":["https://openalex.org/I42934936"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5051882105","display_name":"Binh T. Nguyen","orcid":"https://orcid.org/0000-0001-5249-9702"},"institutions":[{"id":"https://openalex.org/I23582244","display_name":"Ho Chi Minh City University of Science","ror":"https://ror.org/05jfbgm49","country_code":"VN","type":"education","lineage":["https://openalex.org/I123565023","https://openalex.org/I23582244"]},{"id":"https://openalex.org/I123565023","display_name":"Vietnam National University Ho Chi Minh City","ror":"https://ror.org/00waaqh38","country_code":"VN","type":"education","lineage":["https://openalex.org/I123565023"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Binh T. Nguyen","raw_affiliation_strings":["AISIA Research Lab &amp; VNU HCM - University of Science, Ho Chi Minh, Vietnam"],"affiliations":[{"raw_affiliation_string":"AISIA Research Lab &amp; VNU HCM - University of Science, Ho Chi Minh, Vietnam","institution_ids":["https://openalex.org/I23582244","https://openalex.org/I123565023"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5043280322"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.06991926,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"412","last_page":"415"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9890000224113464,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9779000282287598,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8578453063964844},{"id":"https://openalex.org/keywords/software-as-a-service","display_name":"Software as a service","score":0.7537954449653625},{"id":"https://openalex.org/keywords/schema","display_name":"Schema (genetic algorithms)","score":0.5673847198486328},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.483437180519104},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.44594869017601013},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.43035706877708435},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4257073402404785},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4172682464122772},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.40991243720054626},{"id":"https://openalex.org/keywords/software-development","display_name":"Software development","score":0.2148720622062683}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8578453063964844},{"id":"https://openalex.org/C175133352","wikidata":"https://www.wikidata.org/wiki/Q1254596","display_name":"Software as a service","level":4,"score":0.7537954449653625},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.5673847198486328},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.483437180519104},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.44594869017601013},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.43035706877708435},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4257073402404785},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4172682464122772},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40991243720054626},{"id":"https://openalex.org/C529173508","wikidata":"https://www.wikidata.org/wiki/Q638608","display_name":"Software development","level":3,"score":0.2148720622062683},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3372278.3391933","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3372278.3391933","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2020 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},{"id":"pmh:oai:doras.dcu.ie:24631","is_oa":false,"landing_page_url":"http://doras.dcu.ie/24631/","pdf_url":null,"source":{"id":"https://openalex.org/S4306401511","display_name":"Dublin City University Open Access Institutional Repository (Dublin City University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I42934936","host_organization_name":"Dublin City University","host_organization_lineage":["https://openalex.org/I42934936"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"  Nguyen, Quy H., Nguyen, Dac, Dao, Minh-Son, Dang-Nguyen, Duc-Tien ORCID: 0000-0002-2761-2213 &lt;https://orcid.org/0000-0002-2761-2213&gt;, Gurrin, Cathal ORCID: 0000-0003-2903-3968 &lt;https://orcid.org/0000-0003-2903-3968&gt; and Nguyen, Binh T.  (2020) An active learning framework for duplicate detection in SaaS platforms.  In: Proceedings of the 2020 International Conference on Multimedia Retrieval (ICMR '20), 26\u201329 Oct 2020, Dublin, Ireland.  ISBN 978-1-4503-7087-5     ","raw_type":"Conference or Workshop Item"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"awards":[{"id":"https://openalex.org/G3993907298","display_name":null,"funder_award_id":"13/RC/2106","funder_id":"https://openalex.org/F4320320847","funder_display_name":"Science Foundation Ireland"},{"id":"https://openalex.org/G4053123157","display_name":null,"funder_award_id":"SFI/13/RC/2106","funder_id":"https://openalex.org/F4320320847","funder_display_name":"Science Foundation Ireland"}],"funders":[{"id":"https://openalex.org/F4320320847","display_name":"Science Foundation Ireland","ror":"https://ror.org/0271asj38"},{"id":"https://openalex.org/F4320324891","display_name":"Iran Telecommunication Research Center","ror":"https://ror.org/01a3g2z22"},{"id":"https://openalex.org/F4320335839","display_name":"National Institute of Information and Communications Technology","ror":"https://ror.org/016bgq349"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W2034190452","https://openalex.org/W2043481183","https://openalex.org/W2106053110","https://openalex.org/W2108991785","https://openalex.org/W2168440643","https://openalex.org/W2170881581","https://openalex.org/W2325939864","https://openalex.org/W2508865106","https://openalex.org/W2510940142","https://openalex.org/W2903158431","https://openalex.org/W2963588253","https://openalex.org/W2979417040","https://openalex.org/W2984303785","https://openalex.org/W3146259567","https://openalex.org/W3159676894"],"related_works":["https://openalex.org/W1864280877","https://openalex.org/W3173750053","https://openalex.org/W4206347313","https://openalex.org/W2051861582","https://openalex.org/W2186039352","https://openalex.org/W2170955077","https://openalex.org/W2098734934","https://openalex.org/W2794238232","https://openalex.org/W2371222978","https://openalex.org/W1757483628"],"abstract_inverted_index":{"With":[0],"the":[1,22,38,44,64,79,88,95,100,156,165,172,179,188],"rapid":[2],"growth":[3],"of":[4,26,41,94,102,196],"users'":[5],"data":[6,27,162],"in":[7,28,130,209],"SaaS":[8,132],"(Software-as-a-service)":[9],"platforms":[10],"using":[11,83],"micro-services,":[12],"it":[13,61],"becomes":[14],"essential":[15],"to":[16,37,50,98,158,182],"detect":[17],"duplicated":[18,113],"entities":[19,104],"for":[20,71,78,109,124,142,163,187,206],"ensuring":[21],"integrity":[23],"and":[24,31,144],"consistency":[25],"many":[29],"companies":[30],"businesses":[32],"(primarily":[33],"multinational":[34],"corporations).":[35],"Due":[36],"large":[39],"volume":[40],"databases":[42],"today,":[43],"expected":[45],"duplicate":[46,127,146],"detection":[47,65,81,128,147,200],"algorithms":[48,77],"need":[49],"be":[51],"not":[52,153],"only":[53,154],"accurate":[54],"but":[55,169],"also":[56,170],"practical,":[57],"which":[58,202],"means":[59],"that":[60],"can":[62,203],"release":[63],"results":[66],"as":[67,69,176,178],"fast":[68],"possible":[70,112],"a":[72,121,126,131,137,193],"given":[73],"request.":[74],"Among":[75],"existing":[76],"deduplicate":[80,199],"problem,":[82],"Siamese":[84,173],"neural":[85,174],"networks":[86,175],"with":[87],"triplet":[89,180],"loss":[90,181],"has":[91],"become":[92],"one":[93],"robust":[96],"ways":[97],"measure":[99],"similarity":[101],"two":[103],"(texts,":[105],"paragraphs,":[106],"or":[107],"documents)":[108],"identifying":[110],"all":[111],"items.":[114],"In":[115,149],"this":[116,150],"paper,":[117],"we":[118,135,152,191],"first":[119],"propose":[120],"practical":[122],"framework":[123],"building":[125],"system":[129],"platform.":[133],"Second,":[134],"present":[136],"new":[138],"active":[139],"learning":[140,167],"schema":[141],"training":[143],"updating":[145],"algorithms.":[148],"schema,":[151],"allow":[155],"crowd":[157],"provide":[159],"more":[160],"annotated":[161],"enhancing":[164],"chosen":[166],"model":[168,186],"use":[171],"well":[177],"construct":[183],"an":[184],"efficient":[185],"problem.":[189],"Finally,":[190],"design":[192],"user":[194],"interface":[195],"our":[197],"proposed":[198],"system,":[201],"easily":[204],"apply":[205],"empirical":[207],"applications":[208],"different":[210],"companies.":[211]},"counts_by_year":[],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
