{"id":"https://openalex.org/W2203592669","doi":"https://doi.org/10.1109/bigdata.2015.7364061","title":"CrowdMD: Crowdsourcing-based approach for deduplication","display_name":"CrowdMD: Crowdsourcing-based approach for deduplication","publication_year":2015,"publication_date":"2015-10-01","ids":{"openalex":"https://openalex.org/W2203592669","doi":"https://doi.org/10.1109/bigdata.2015.7364061","mag":"2203592669"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata.2015.7364061","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2015.7364061","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5046968208","display_name":"Asma Abboura","orcid":null},"institutions":[{"id":"https://openalex.org/I137407579","display_name":"Universit\u00e9 Oran 1 Ahmed Ben Bella","ror":"https://ror.org/059et2b68","country_code":"DZ","type":"education","lineage":["https://openalex.org/I137407579"]}],"countries":["DZ"],"is_corresponding":true,"raw_author_name":"Asma Abboura","raw_affiliation_strings":["RIIR Laboratory, University of Oran 1, Oran, Algeria"],"affiliations":[{"raw_affiliation_string":"RIIR Laboratory, University of Oran 1, Oran, Algeria","institution_ids":["https://openalex.org/I137407579"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067847705","display_name":"Soror Sahrl","orcid":null},"institutions":[{"id":"https://openalex.org/I110736937","display_name":"D\u00e9l\u00e9gation Paris 5","ror":"https://ror.org/02e0y6e06","country_code":"FR","type":"government","lineage":["https://openalex.org/I110736937","https://openalex.org/I154526488"]},{"id":"https://openalex.org/I204730241","display_name":"Universit\u00e9 Paris Cit\u00e9","ror":"https://ror.org/05f82e368","country_code":"FR","type":"education","lineage":["https://openalex.org/I204730241"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Soror Sahrl","raw_affiliation_strings":["Universit\u00e9 Paris Descartes Sorbonnes Paris Cit\u00e9, Paris, France"],"affiliations":[{"raw_affiliation_string":"Universit\u00e9 Paris Descartes Sorbonnes Paris Cit\u00e9, Paris, France","institution_ids":["https://openalex.org/I110736937","https://openalex.org/I204730241"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086740653","display_name":"Mourad Ouziri","orcid":"https://orcid.org/0000-0003-1682-2781"},"institutions":[{"id":"https://openalex.org/I204730241","display_name":"Universit\u00e9 Paris Cit\u00e9","ror":"https://ror.org/05f82e368","country_code":"FR","type":"education","lineage":["https://openalex.org/I204730241"]},{"id":"https://openalex.org/I110736937","display_name":"D\u00e9l\u00e9gation Paris 5","ror":"https://ror.org/02e0y6e06","country_code":"FR","type":"government","lineage":["https://openalex.org/I110736937","https://openalex.org/I154526488"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Mourad Ouziri","raw_affiliation_strings":["Universit\u00e9 Paris Descartes Sorbonnes Paris Cit\u00e9, Paris, France"],"affiliations":[{"raw_affiliation_string":"Universit\u00e9 Paris Descartes Sorbonnes Paris Cit\u00e9, Paris, France","institution_ids":["https://openalex.org/I110736937","https://openalex.org/I204730241"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5073280092","display_name":"Salima Benbernou","orcid":"https://orcid.org/0000-0001-5687-8152"},"institutions":[{"id":"https://openalex.org/I110736937","display_name":"D\u00e9l\u00e9gation Paris 5","ror":"https://ror.org/02e0y6e06","country_code":"FR","type":"government","lineage":["https://openalex.org/I110736937","https://openalex.org/I154526488"]},{"id":"https://openalex.org/I204730241","display_name":"Universit\u00e9 Paris Cit\u00e9","ror":"https://ror.org/05f82e368","country_code":"FR","type":"education","lineage":["https://openalex.org/I204730241"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Salima Benbernou","raw_affiliation_strings":["Universit\u00e9 Paris Descartes Sorbonnes Paris Cit\u00e9, Paris, France"],"affiliations":[{"raw_affiliation_string":"Universit\u00e9 Paris Descartes Sorbonnes Paris Cit\u00e9, Paris, France","institution_ids":["https://openalex.org/I110736937","https://openalex.org/I204730241"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5046968208"],"corresponding_institution_ids":["https://openalex.org/I137407579"],"apc_list":null,"apc_paid":null,"fwci":0.8012,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.7874113,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"2621","last_page":"2627"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11614","display_name":"Cloud Data Security Solutions","score":0.982699990272522,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8564674854278564},{"id":"https://openalex.org/keywords/data-deduplication","display_name":"Data deduplication","score":0.7592867612838745},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.6757470369338989},{"id":"https://openalex.org/keywords/crowdsourcing","display_name":"Crowdsourcing","score":0.6618159413337708},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.6520302295684814},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.6385049223899841},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.5656383037567139},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5579238533973694},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.543071985244751},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.48574382066726685},{"id":"https://openalex.org/keywords/interface","display_name":"Interface (matter)","score":0.4284515082836151},{"id":"https://openalex.org/keywords/database-transaction","display_name":"Database transaction","score":0.41718754172325134},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.35773929953575134},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3488086760044098},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3406805396080017},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.1035841703414917}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8564674854278564},{"id":"https://openalex.org/C32587265","wikidata":"https://www.wikidata.org/wiki/Q1182260","display_name":"Data deduplication","level":2,"score":0.7592867612838745},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.6757470369338989},{"id":"https://openalex.org/C62230096","wikidata":"https://www.wikidata.org/wiki/Q275969","display_name":"Crowdsourcing","level":2,"score":0.6618159413337708},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.6520302295684814},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.6385049223899841},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.5656383037567139},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5579238533973694},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.543071985244751},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.48574382066726685},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.4284515082836151},{"id":"https://openalex.org/C75949130","wikidata":"https://www.wikidata.org/wiki/Q848010","display_name":"Database transaction","level":2,"score":0.41718754172325134},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.35773929953575134},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3488086760044098},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3406805396080017},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.1035841703414917},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C157915830","wikidata":"https://www.wikidata.org/wiki/Q2928001","display_name":"Bubble","level":2,"score":0.0},{"id":"https://openalex.org/C129307140","wikidata":"https://www.wikidata.org/wiki/Q6795880","display_name":"Maximum bubble pressure method","level":3,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata.2015.7364061","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2015.7364061","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W935072310","https://openalex.org/W1484413656","https://openalex.org/W1496255159","https://openalex.org/W1507626491","https://openalex.org/W1807762489","https://openalex.org/W1952481632","https://openalex.org/W1964786778","https://openalex.org/W2000794276","https://openalex.org/W2007343074","https://openalex.org/W2008420823","https://openalex.org/W2040263621","https://openalex.org/W2056748234","https://openalex.org/W2107966677","https://openalex.org/W2108991785","https://openalex.org/W2113878109","https://openalex.org/W2137479650","https://openalex.org/W2145346822","https://openalex.org/W2147805208","https://openalex.org/W2153813903","https://openalex.org/W2166549982","https://openalex.org/W2288244345","https://openalex.org/W2330963852","https://openalex.org/W3146259567","https://openalex.org/W6628750762","https://openalex.org/W6638705572","https://openalex.org/W6696516842","https://openalex.org/W7016357048","https://openalex.org/W7056835119"],"related_works":["https://openalex.org/W3032998312","https://openalex.org/W1503094549","https://openalex.org/W4384486036","https://openalex.org/W135177976","https://openalex.org/W2337920774","https://openalex.org/W4318823662","https://openalex.org/W2886410948","https://openalex.org/W2025875869","https://openalex.org/W3207526114","https://openalex.org/W4286908577"],"abstract_inverted_index":{"Matching":[0],"dependencies":[1],"(MDs)":[2],"were":[3],"recently":[4],"introduced":[5],"as":[6],"quality":[7,33],"rules":[8,17,34],"for":[9,68],"data":[10,92],"cleaning":[11],"and":[12,26,43,48,126],"entity":[13],"resolution.":[14],"They":[15],"are":[16],"that":[18],"specify":[19],"what":[20],"values":[21],"should":[22],"be":[23,29,130],"considered":[24],"duplicates,":[25],"have":[27],"to":[28,52,76,95,113],"matched.":[30],"Defining":[31],"such":[32],"on":[35],"a":[36,40,44,64,79,102],"database":[37],"instance,":[38],"is":[39],"very":[41],"expensive":[42],"time":[45],"consuming":[46],"process,":[47],"requires":[49],"huge":[50],"efforts":[51],"analyse":[53],"the":[54,74,109,118,122],"whole":[55],"database.":[56],"In":[57],"this":[58,133],"demo":[59],"paper,":[60],"we":[61,105],"present":[62],"CrowdMD,":[63],"hybrid":[65],"machine-crowd":[66],"system":[67],"generating":[69],"MDs.":[70],"It":[71],"first":[72],"asks":[73],"crowd":[75],"determine":[77],"whether":[78],"given":[80],"pair,":[81],"from":[82,132],"training":[83,119,134],"sample":[84,120],"pairs,":[85],"match":[86],"or":[87],"not.":[88],"Then,":[89],"it":[90],"uses":[91],"mining":[93],"techniques":[94],"generate":[96,114],"attributes":[97],"constituting":[98],"an":[99],"MD.":[100],"Using":[101],"Restaurant":[103],"database,":[104],"will":[106],"show":[107],"how":[108,127],"crowders":[110],"can":[111,129],"help":[112],"MDs":[115,128],"by":[116],"labelling":[117],"through":[121],"CrowdMD":[123],"user":[124],"interface":[125],"mined":[131],"set.":[135]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2017,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
