{"id":"https://openalex.org/W4408189963","doi":"https://doi.org/10.3390/a18030150","title":"Real-Time Fuzzy Record-Matching Similarity Metric and Optimal Q-Gram Filter","display_name":"Real-Time Fuzzy Record-Matching Similarity Metric and Optimal Q-Gram Filter","publication_year":2025,"publication_date":"2025-03-06","ids":{"openalex":"https://openalex.org/W4408189963","doi":"https://doi.org/10.3390/a18030150"},"language":"en","primary_location":{"id":"doi:10.3390/a18030150","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a18030150","pdf_url":"https://www.mdpi.com/1999-4893/18/3/150/pdf?version=1741259467","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/1999-4893/18/3/150/pdf?version=1741259467","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5013724754","display_name":"Ond\u0159ej Rozinek","orcid":"https://orcid.org/0000-0003-2248-7940"},"institutions":[{"id":"https://openalex.org/I44504214","display_name":"Czech Technical University in Prague","ror":"https://ror.org/03kqpb082","country_code":"CZ","type":"education","lineage":["https://openalex.org/I44504214"]},{"id":"https://openalex.org/I140744787","display_name":"University of Pardubice","ror":"https://ror.org/01chzd453","country_code":"CZ","type":"education","lineage":["https://openalex.org/I140744787"]}],"countries":["CZ"],"is_corresponding":true,"raw_author_name":"Ond\u0159ej Rozinek","raw_affiliation_strings":["Department of Information Technology, Faculty of Electrical Engineering and Informatics, University of Pardubice, Studentsk\u00e1 95, 532 10 Pardubice, Czech Republic","Department of Software Engineering, Faculty of Information Technology, Czech Technical University in Prague, Th\u00e1kurova 9, 166 34 Prague, Czech Republic","Rozinet s.r.o., U Josefa 110, 532 10 Pardubice, Czech Republic"],"affiliations":[{"raw_affiliation_string":"Department of Information Technology, Faculty of Electrical Engineering and Informatics, University of Pardubice, Studentsk\u00e1 95, 532 10 Pardubice, Czech Republic","institution_ids":["https://openalex.org/I140744787"]},{"raw_affiliation_string":"Department of Software Engineering, Faculty of Information Technology, Czech Technical University in Prague, Th\u00e1kurova 9, 166 34 Prague, Czech Republic","institution_ids":["https://openalex.org/I44504214"]},{"raw_affiliation_string":"Rozinet s.r.o., U Josefa 110, 532 10 Pardubice, Czech Republic","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083578730","display_name":"Jaroslav Marek","orcid":"https://orcid.org/0000-0002-5712-8852"},"institutions":[{"id":"https://openalex.org/I140744787","display_name":"University of Pardubice","ror":"https://ror.org/01chzd453","country_code":"CZ","type":"education","lineage":["https://openalex.org/I140744787"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Jaroslav Marek","raw_affiliation_strings":["Department of Automation and Mathematics, Faculty of Electrical Engineering and Informatics, University of Pardubice, Studentsk\u00e1 95, 532 10 Pardubice, Czech Republic"],"affiliations":[{"raw_affiliation_string":"Department of Automation and Mathematics, Faculty of Electrical Engineering and Informatics, University of Pardubice, Studentsk\u00e1 95, 532 10 Pardubice, Czech Republic","institution_ids":["https://openalex.org/I140744787"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088997643","display_name":"Jan Panu\u0161","orcid":"https://orcid.org/0000-0003-3190-7310"},"institutions":[{"id":"https://openalex.org/I140744787","display_name":"University of Pardubice","ror":"https://ror.org/01chzd453","country_code":"CZ","type":"education","lineage":["https://openalex.org/I140744787"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Jan Panu\u0161","raw_affiliation_strings":["Department of Information Technology, Faculty of Electrical Engineering and Informatics, University of Pardubice, Studentsk\u00e1 95, 532 10 Pardubice, Czech Republic"],"affiliations":[{"raw_affiliation_string":"Department of Information Technology, Faculty of Electrical Engineering and Informatics, University of Pardubice, Studentsk\u00e1 95, 532 10 Pardubice, Czech Republic","institution_ids":["https://openalex.org/I140744787"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5027757741","display_name":"Jan Mare\u0161","orcid":"https://orcid.org/0000-0003-4693-2519"},"institutions":[{"id":"https://openalex.org/I9073902","display_name":"University of Chemistry and Technology, Prague","ror":"https://ror.org/05ggn0a85","country_code":"CZ","type":"education","lineage":["https://openalex.org/I9073902"]},{"id":"https://openalex.org/I140744787","display_name":"University of Pardubice","ror":"https://ror.org/01chzd453","country_code":"CZ","type":"education","lineage":["https://openalex.org/I140744787"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Jan Mare\u0161","raw_affiliation_strings":["Department of Automation and Mathematics, Faculty of Electrical Engineering and Informatics, University of Pardubice, Studentsk\u00e1 95, 532 10 Pardubice, Czech Republic","Department of Mathematics, Informatics and Cybernetics, University of Chemistry and Technology Prague, Technicka 5, 166 28 Prague, Czech Republic"],"affiliations":[{"raw_affiliation_string":"Department of Automation and Mathematics, Faculty of Electrical Engineering and Informatics, University of Pardubice, Studentsk\u00e1 95, 532 10 Pardubice, Czech Republic","institution_ids":["https://openalex.org/I140744787"]},{"raw_affiliation_string":"Department of Mathematics, Informatics and Cybernetics, University of Chemistry and Technology Prague, Technicka 5, 166 28 Prague, Czech Republic","institution_ids":["https://openalex.org/I9073902"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5013724754"],"corresponding_institution_ids":["https://openalex.org/I140744787","https://openalex.org/I44504214"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":2.1724,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.84542532,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"18","issue":"3","first_page":"150","last_page":"150"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9847000241279602,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9775999784469604,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.5991876721382141},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.5949799418449402},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.5266532897949219},{"id":"https://openalex.org/keywords/n-gram","display_name":"n-gram","score":0.5239418148994446},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.497548371553421},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.48982688784599304},{"id":"https://openalex.org/keywords/gram","display_name":"Gram","score":0.4603092670440674},{"id":"https://openalex.org/keywords/fuzzy-logic","display_name":"Fuzzy logic","score":0.44734689593315125},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.41320428252220154},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3615705966949463},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3527422547340393},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.34746748208999634},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.20378336310386658},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.16482669115066528},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.06236922740936279}],"concepts":[{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.5991876721382141},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.5949799418449402},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.5266532897949219},{"id":"https://openalex.org/C117884012","wikidata":"https://www.wikidata.org/wiki/Q94489","display_name":"n-gram","level":3,"score":0.5239418148994446},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.497548371553421},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.48982688784599304},{"id":"https://openalex.org/C161369605","wikidata":"https://www.wikidata.org/wiki/Q41803","display_name":"Gram","level":3,"score":0.4603092670440674},{"id":"https://openalex.org/C58166","wikidata":"https://www.wikidata.org/wiki/Q224821","display_name":"Fuzzy logic","level":2,"score":0.44734689593315125},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.41320428252220154},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3615705966949463},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3527422547340393},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.34746748208999634},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.20378336310386658},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.16482669115066528},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.06236922740936279},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C523546767","wikidata":"https://www.wikidata.org/wiki/Q10876","display_name":"Bacteria","level":2,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3390/a18030150","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a18030150","pdf_url":"https://www.mdpi.com/1999-4893/18/3/150/pdf?version=1741259467","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:512ce68897f245b28f6d9af6e3cb1539","is_oa":true,"landing_page_url":"https://doaj.org/article/512ce68897f245b28f6d9af6e3cb1539","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Algorithms, Vol 18, Iss 3, p 150 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/a18030150","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a18030150","pdf_url":"https://www.mdpi.com/1999-4893/18/3/150/pdf?version=1741259467","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4408189963.pdf"},"referenced_works_count":59,"referenced_works":["https://openalex.org/W41404523","https://openalex.org/W1590200387","https://openalex.org/W1593185626","https://openalex.org/W1646278814","https://openalex.org/W1647671624","https://openalex.org/W1980804566","https://openalex.org/W1998048394","https://openalex.org/W2031250218","https://openalex.org/W2034190452","https://openalex.org/W2039001182","https://openalex.org/W2041670460","https://openalex.org/W2043481183","https://openalex.org/W2047281923","https://openalex.org/W2057900969","https://openalex.org/W2065259291","https://openalex.org/W2066792529","https://openalex.org/W2068448872","https://openalex.org/W2074231493","https://openalex.org/W2074353216","https://openalex.org/W2087064593","https://openalex.org/W2102443632","https://openalex.org/W2111643688","https://openalex.org/W2123142779","https://openalex.org/W2123241698","https://openalex.org/W2125980212","https://openalex.org/W2135223301","https://openalex.org/W2141461755","https://openalex.org/W2149342630","https://openalex.org/W2150698190","https://openalex.org/W2156279557","https://openalex.org/W2158919277","https://openalex.org/W2163993443","https://openalex.org/W2170478076","https://openalex.org/W2222512263","https://openalex.org/W2286724461","https://openalex.org/W2472512514","https://openalex.org/W2485599079","https://openalex.org/W2577842110","https://openalex.org/W2605047533","https://openalex.org/W2606791715","https://openalex.org/W2765285316","https://openalex.org/W2902120922","https://openalex.org/W2931479232","https://openalex.org/W2992232047","https://openalex.org/W2999282340","https://openalex.org/W3105241684","https://openalex.org/W3132898468","https://openalex.org/W3146259567","https://openalex.org/W4238945254","https://openalex.org/W4396941164","https://openalex.org/W4396941222","https://openalex.org/W6631166905","https://openalex.org/W6635148446","https://openalex.org/W6636915900","https://openalex.org/W6663994553","https://openalex.org/W6667260149","https://openalex.org/W6682339282","https://openalex.org/W6771752040","https://openalex.org/W6931301004"],"related_works":["https://openalex.org/W2906970013","https://openalex.org/W3126081632","https://openalex.org/W2625039379","https://openalex.org/W2088254117","https://openalex.org/W4254593385","https://openalex.org/W2790582133","https://openalex.org/W4309488207","https://openalex.org/W1901380241","https://openalex.org/W311963822","https://openalex.org/W2789473152"],"abstract_inverted_index":{"In":[0,84,130],"this":[1],"paper,":[2],"we":[3,57,88,187],"introduce":[4],"an":[5,90,96],"advanced":[6],"Fuzzy":[7],"Record":[8],"Similarity":[9],"Metric":[10],"(FRMS)":[11],"that":[12,68],"improves":[13],"approximate":[14,63,216],"record":[15,22,148],"matching":[16,65,153,218],"and":[17,50,66,78,143,171,198,212,219],"models":[18,144],"human":[19,145],"perception":[20,146],"of":[21,74,120,128,140,147,178,191,194,201],"similarity.":[23],"The":[24,107,158],"FRMS":[25,134,159],"utilizes":[26],"a":[27,37,41,59,113,124,137,155,184,202,209],"newly":[28],"developed":[29],"similarity":[30,149],"space":[31],"with":[32,36,44,123,221],"favorable":[33],"properties":[34],"combined":[35],"metric":[38],"space,":[39],"employing":[40],"bag-of-words":[42],"model":[43],"general":[45],"applications":[46,163],"in":[47,72,154,164],"text":[48],"mining":[49],"cluster":[51],"analysis.":[52],"To":[53],"optimize":[54],"the":[55,85,131,179,189,192,195,199],"FRMS,":[56],"propose":[58],"two-stage":[60],"method":[61,207],"for":[62,100,136],"string":[64,217],"search":[67,220],"outperforms":[69],"baseline":[70],"methods":[71],"terms":[73],"average":[75],"time":[76,126,139],"complexity":[77,127],"F":[79],"measure":[80],"on":[81],"various":[82],"datasets.":[83],"first":[86],"stage,":[87,133],"construct":[89],"optimal":[91,97],"Q-gram":[92,109,196],"count":[93,110],"filter":[94,111,197],"as":[95,105,169],"lower":[98],"bound":[99],"fuzzy":[101],"token":[102],"similarities":[103],"such":[104,168],"FRMS.":[106],"approximated":[108],"achieves":[112],"high":[114],"accuracy":[115],"rate,":[116],"filtering":[117],"over":[118],"99%":[119],"dissimilar":[121],"records,":[122],"constant":[125],"\u2248O(1).":[129],"second":[132],"runs":[135],"polynomial":[138],"approximately":[141],"\u2248O(n4)":[142],"by":[150,176],"maximum":[151],"weight":[152],"bipartite":[156],"graph.":[157],"architecture":[160],"has":[161,172],"widespread":[162],"structured":[165],"document":[166],"storage":[167],"databases":[170],"already":[173],"been":[174],"commercialized":[175],"one":[177],"largest":[180],"IT":[181],"companies.":[182],"As":[183],"side":[185],"result,":[186],"explain":[188],"behavior":[190],"singularity":[193],"advantages":[200],"padding":[203],"extension.":[204],"Overall,":[205],"our":[206],"provides":[208],"more":[210],"accurate":[211],"efficient":[213],"approach":[214],"to":[215],"real-time":[222],"runtime.":[223]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-02-27T16:54:17.756197","created_date":"2025-10-10T00:00:00"}
