{"id":"https://openalex.org/W2158107206","doi":"https://doi.org/10.1145/2480362.2480500","title":"Model words-driven approaches for duplicate detection on the web","display_name":"Model words-driven approaches for duplicate detection on the web","publication_year":2013,"publication_date":"2013-03-18","ids":{"openalex":"https://openalex.org/W2158107206","doi":"https://doi.org/10.1145/2480362.2480500","mag":"2158107206"},"language":"en","primary_location":{"id":"doi:10.1145/2480362.2480500","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2480362.2480500","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 28th Annual ACM Symposium on Applied Computing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5000886486","display_name":"Marnix de Bakker","orcid":null},"institutions":[{"id":"https://openalex.org/I913958620","display_name":"Erasmus University Rotterdam","ror":"https://ror.org/057w15z03","country_code":"NL","type":"education","lineage":["https://openalex.org/I913958620"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Marnix de Bakker","raw_affiliation_strings":["Erasmus University Rotterdam, Rotterdam, The Netherlands"],"affiliations":[{"raw_affiliation_string":"Erasmus University Rotterdam, Rotterdam, The Netherlands","institution_ids":["https://openalex.org/I913958620"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005312918","display_name":"Damir Vandi\u0107","orcid":null},"institutions":[{"id":"https://openalex.org/I913958620","display_name":"Erasmus University Rotterdam","ror":"https://ror.org/057w15z03","country_code":"NL","type":"education","lineage":["https://openalex.org/I913958620"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Damir Vandic","raw_affiliation_strings":["Erasmus University Rotterdam, Rotterdam, The Netherlands"],"affiliations":[{"raw_affiliation_string":"Erasmus University Rotterdam, Rotterdam, The Netherlands","institution_ids":["https://openalex.org/I913958620"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044867921","display_name":"Flavius Fr\u0103sincar","orcid":"https://orcid.org/0000-0002-8031-758X"},"institutions":[{"id":"https://openalex.org/I913958620","display_name":"Erasmus University Rotterdam","ror":"https://ror.org/057w15z03","country_code":"NL","type":"education","lineage":["https://openalex.org/I913958620"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Flavius Frasincar","raw_affiliation_strings":["Erasmus University Rotterdam, Rotterdam, The Netherlands"],"affiliations":[{"raw_affiliation_string":"Erasmus University Rotterdam, Rotterdam, The Netherlands","institution_ids":["https://openalex.org/I913958620"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5080242338","display_name":"Uzay Kaymak","orcid":"https://orcid.org/0000-0002-4500-9098"},"institutions":[{"id":"https://openalex.org/I83019370","display_name":"Eindhoven University of Technology","ror":"https://ror.org/02c2kyt77","country_code":"NL","type":"education","lineage":["https://openalex.org/I83019370"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Uzay Kaymak","raw_affiliation_strings":["Eindhoven University of Technology, Eindhoven, The Netherlands"],"affiliations":[{"raw_affiliation_string":"Eindhoven University of Technology, Eindhoven, The Netherlands","institution_ids":["https://openalex.org/I83019370"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5000886486"],"corresponding_institution_ids":["https://openalex.org/I913958620"],"apc_list":null,"apc_paid":null,"fwci":1.2803,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.84033064,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"717","last_page":"723"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/measure","display_name":"Measure (data warehouse)","score":0.7690683603286743},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7448118925094604},{"id":"https://openalex.org/keywords/product","display_name":"Product (mathematics)","score":0.7146451473236084},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.704096794128418},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5815646648406982},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4642553925514221},{"id":"https://openalex.org/keywords/order","display_name":"Order (exchange)","score":0.4323255121707916},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.4205821752548218},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.14852753281593323},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.11290672421455383},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.08387070894241333}],"concepts":[{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.7690683603286743},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7448118925094604},{"id":"https://openalex.org/C90673727","wikidata":"https://www.wikidata.org/wiki/Q901718","display_name":"Product (mathematics)","level":2,"score":0.7146451473236084},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.704096794128418},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5815646648406982},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4642553925514221},{"id":"https://openalex.org/C182306322","wikidata":"https://www.wikidata.org/wiki/Q1779371","display_name":"Order (exchange)","level":2,"score":0.4323255121707916},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.4205821752548218},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.14852753281593323},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.11290672421455383},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.08387070894241333},{"id":"https://openalex.org/C10138342","wikidata":"https://www.wikidata.org/wiki/Q43015","display_name":"Finance","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/2480362.2480500","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2480362.2480500","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 28th Annual ACM Symposium on Applied Computing","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.eur.nl:openaire_cris_publications/f329640a-d802-4c06-98e0-5824f133d270","is_oa":false,"landing_page_url":"https://pure.eur.nl/en/publications/f329640a-d802-4c06-98e0-5824f133d270","pdf_url":null,"source":{"id":"https://openalex.org/S4306401266","display_name":"EUR Research Repository (Erasmus University Rotterdam)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I913958620","host_organization_name":"Erasmus University Rotterdam","host_organization_lineage":["https://openalex.org/I913958620"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"de Bakker, M, Vandic, D, Frasincar, F & Kaymak, U 2013, Model Words-Driven Approaches for Duplicate Detection on the Web. in 28th Annual ACM Symposium on Applied Computing (SAC 2013). Association for Computing Machinery (ACM), pp. 717-723. https://doi.org/10.1145/2480362.2480500","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.973.81","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.973.81","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://damirvandic.com/wp-content/papercite-data/pdf/sac_2013.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W1647671624","https://openalex.org/W1956559956","https://openalex.org/W2034190452","https://openalex.org/W2043437843","https://openalex.org/W2065259291","https://openalex.org/W2102443632","https://openalex.org/W2119821739","https://openalex.org/W2147458937","https://openalex.org/W2164456230","https://openalex.org/W2409362607","https://openalex.org/W2997181101","https://openalex.org/W4252684946","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W4255837520","https://openalex.org/W2387011115","https://openalex.org/W4234808182","https://openalex.org/W2382043075","https://openalex.org/W2809151339","https://openalex.org/W2216913934","https://openalex.org/W2360673138","https://openalex.org/W2809370583","https://openalex.org/W2389748081","https://openalex.org/W2775171296"],"abstract_inverted_index":{"The":[0,51,79],"detection":[1],"of":[2,7,28,134,147],"product":[3,14,29,45,49,63,75,89,101],"duplicates":[4],"is":[5,77,103],"one":[6],"the":[8,26,40,122,128,137,141],"many":[9],"challenges":[10],"that":[11,38,121],"Web":[12,117],"shop":[13],"aggregators":[15],"are":[16],"facing.":[17],"This":[18],"paper":[19],"presents":[20],"two":[21,115],"new":[22],"methods":[23,33],"to":[24,47,59,68,94],"solve":[25],"problem":[27],"duplicate":[30,70,96],"detection.":[31],"Both":[32],"extend":[34],"a":[35],"state-of-the-art":[36,130,143],"approach":[37],"uses":[39,55],"found":[41],"model":[42,85],"words":[43,86],"in":[44,87,92,132,145],"titles":[46],"detect":[48],"duplicates.":[50],"first":[52,138],"proposed":[53,81,124],"method":[54,82,125,131,139,144],"several":[56],"distance":[57],"measures":[58],"calculate":[60],"distances":[61],"between":[62],"attribute":[64,90],"keys":[65],"and":[66],"values":[67,91],"find":[69,95],"products":[71,97],"when":[72,98],"no":[73,99],"matching":[74,84,100],"title":[76,102],"found.":[78,104],"second":[80,123],"detects":[83],"all":[88],"order":[93],"Based":[105],"on":[106,110],"our":[107],"experimental":[108],"results":[109],"real-world":[111],"data":[112],"gathered":[113],"from":[114],"existing":[116,129,142],"shops,":[118],"we":[119],"show":[120],"significantly":[126],"outperforms":[127,140],"terms":[133,146],"F1-measure,":[135,148],"while":[136],"but":[149],"not":[150],"significantly.":[151]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2014,"cited_by_count":3}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
