{"id":"https://openalex.org/W2105643880","doi":"https://doi.org/10.1145/2063576.2063616","title":"Frequency-aware similarity measures","display_name":"Frequency-aware similarity measures","publication_year":2011,"publication_date":"2011-10-24","ids":{"openalex":"https://openalex.org/W2105643880","doi":"https://doi.org/10.1145/2063576.2063616","mag":"2105643880"},"language":"en","primary_location":{"id":"doi:10.1145/2063576.2063616","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2063576.2063616","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 20th ACM international conference on Information and knowledge management","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5070190076","display_name":"Dustin Lange","orcid":null},"institutions":[{"id":"https://openalex.org/I143288331","display_name":"Hasso Plattner Institute","ror":"https://ror.org/058rn5r42","country_code":"DE","type":"facility","lineage":["https://openalex.org/I143288331","https://openalex.org/I176453806"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Dustin Lange","raw_affiliation_strings":["Hasso Plattner Institute, Potsdam, Germany"],"affiliations":[{"raw_affiliation_string":"Hasso Plattner Institute, Potsdam, Germany","institution_ids":["https://openalex.org/I143288331"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5053028480","display_name":"Felix Naumann","orcid":"https://orcid.org/0000-0002-4483-1389"},"institutions":[{"id":"https://openalex.org/I143288331","display_name":"Hasso Plattner Institute","ror":"https://ror.org/058rn5r42","country_code":"DE","type":"facility","lineage":["https://openalex.org/I143288331","https://openalex.org/I176453806"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Felix Naumann","raw_affiliation_strings":["Hasso Plattner Institute, Potsdam, Germany"],"affiliations":[{"raw_affiliation_string":"Hasso Plattner Institute, Potsdam, Germany","institution_ids":["https://openalex.org/I143288331"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5070190076"],"corresponding_institution_ids":["https://openalex.org/I143288331"],"apc_list":null,"apc_paid":null,"fwci":1.2292,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.82003174,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"243","last_page":"248"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11819","display_name":"Data-Driven Disease Surveillance","score":0.9872999787330627,"subfield":{"id":"https://openalex.org/subfields/2713","display_name":"Epidemiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.982200026512146,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/intuition","display_name":"Intuition","score":0.8161835670471191},{"id":"https://openalex.org/keywords/partition","display_name":"Partition (number theory)","score":0.7341235876083374},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6702860593795776},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.6432572603225708},{"id":"https://openalex.org/keywords/similarity-measure","display_name":"Similarity measure","score":0.58380526304245},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4814143180847168},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.43124452233314514},{"id":"https://openalex.org/keywords/genetic-programming","display_name":"Genetic programming","score":0.42573487758636475},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.42568379640579224},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.277042031288147},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.1359896957874298}],"concepts":[{"id":"https://openalex.org/C132010649","wikidata":"https://www.wikidata.org/wiki/Q189222","display_name":"Intuition","level":2,"score":0.8161835670471191},{"id":"https://openalex.org/C42812","wikidata":"https://www.wikidata.org/wiki/Q1082910","display_name":"Partition (number theory)","level":2,"score":0.7341235876083374},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6702860593795776},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.6432572603225708},{"id":"https://openalex.org/C2776517306","wikidata":"https://www.wikidata.org/wiki/Q29017317","display_name":"Similarity measure","level":2,"score":0.58380526304245},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4814143180847168},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.43124452233314514},{"id":"https://openalex.org/C110332635","wikidata":"https://www.wikidata.org/wiki/Q629498","display_name":"Genetic programming","level":2,"score":0.42573487758636475},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42568379640579224},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.277042031288147},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.1359896957874298},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2063576.2063616","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2063576.2063616","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 20th ACM international conference on Information and knowledge management","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7200000286102295,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W46452414","https://openalex.org/W1576818901","https://openalex.org/W1964879903","https://openalex.org/W1972837706","https://openalex.org/W1982287794","https://openalex.org/W2040925009","https://openalex.org/W2067566391","https://openalex.org/W2100604298","https://openalex.org/W2107976925","https://openalex.org/W2108991785","https://openalex.org/W2133990480","https://openalex.org/W2147107544","https://openalex.org/W2148019918","https://openalex.org/W2162337786","https://openalex.org/W2164456230","https://openalex.org/W4238753141"],"related_works":["https://openalex.org/W2364252372","https://openalex.org/W4234066492","https://openalex.org/W1998063895","https://openalex.org/W2319693127","https://openalex.org/W308539617","https://openalex.org/W2072263576","https://openalex.org/W2474567666","https://openalex.org/W1940044583","https://openalex.org/W2806903871","https://openalex.org/W4320802053"],"abstract_inverted_index":{"Measuring":[0],"the":[1],"similarity":[2,20,29,138,151],"of":[3,25,94,149],"two":[4],"records":[5],"is":[6,43,57],"a":[7,35,44,136,147,159,172],"challenging":[8],"problem,":[9],"but":[10],"necessary":[11],"for":[12],"fundamental":[13],"tasks,":[14],"such":[15,74],"as":[16,75],"duplicate":[17],"detection":[18],"and":[19,123],"search.":[21],"By":[22],"exploiting":[23],"frequencies":[24,93],"attribute":[26,72,95],"values,":[27,73],"many":[28],"measures":[30,152],"can":[31],"be":[32],"improved:":[33],"In":[34],"person":[36],"table":[37],"with":[38,112,119,128,171],"U.S.":[39],"citizens,":[40],"Arnold":[41,52],"Schwarzenegger":[42],"very":[45,58],"rare":[46,113],"name.":[47],"If":[48],"we":[49,99,134,140,162],"find":[50],"several":[51],"Schwarzeneggers":[53],"in":[54],"it,":[55],"it":[56],"likely":[59],"that":[60],"these":[61],"are":[62,65],"duplicates.":[63],"We":[64,80,167],"then":[66],"less":[67],"strict":[68],"when":[69],"comparing":[70],"other":[71],"birth":[76],"date":[77],"or":[78],"address.":[79],"put":[81],"this":[82],"intuition":[83],"to":[84,92,145],"use":[85],"by":[86,176],"partitioning":[87,165],"compared":[88],"record":[89],"pairs":[90,111,118,127],"according":[91],"values.":[96],"For":[97,131],"example,":[98],"could":[100],"create":[101],"three":[102],"partitions":[103],"from":[104],"our":[105],"data:":[106],"Partition":[107,115,124],"1":[108],"contains":[109],"all":[110,117,126],"names,":[114,122],"2":[116],"medium":[120],"frequent":[121,129],"3":[125],"names.":[130],"each":[132],"partition,":[133],"learn":[135],"different":[137,164],"measure:":[139],"apply":[141],"machine":[142],"learning":[143],"techniques":[144],"combine":[146],"set":[148],"base":[150],"into":[153],"an":[154],"overall":[155],"measure.":[156],"To":[157],"determine":[158],"good":[160],"partitioning,":[161],"compare":[163],"strategies.":[166],"achieved":[168],"best":[169],"results":[170],"novel":[173],"algorithm":[174],"inspired":[175],"genetic":[177],"programming.":[178]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":2},{"year":2014,"cited_by_count":1},{"year":2012,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
