{"id":"https://openalex.org/W1988481267","doi":"https://doi.org/10.1045/november14-fedoryszak","title":"Efficient Blocking Method for a Large Scale Citation Matching","display_name":"Efficient Blocking Method for a Large Scale Citation Matching","publication_year":2014,"publication_date":"2014-11-01","ids":{"openalex":"https://openalex.org/W1988481267","doi":"https://doi.org/10.1045/november14-fedoryszak","mag":"1988481267"},"language":"en","primary_location":{"id":"doi:10.1045/november14-fedoryszak","is_oa":true,"landing_page_url":"https://doi.org/10.1045/november14-fedoryszak","pdf_url":null,"source":{"id":"https://openalex.org/S119508283","display_name":"D-Lib Magazine","issn_l":"1082-9873","issn":["1082-9873"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310316316","host_organization_name":"Corporation for National Research Initiatives","host_organization_lineage":["https://openalex.org/P4310316316"],"host_organization_lineage_names":["Corporation for National Research Initiatives"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"D-Lib Magazine","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1045/november14-fedoryszak","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080956757","display_name":"Mateusz Fedoryszak","orcid":null},"institutions":[{"id":"https://openalex.org/I4210124246","display_name":"CITIC Group (China)","ror":"https://ror.org/037b6wy35","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210124246"]},{"id":"https://openalex.org/I4654613","display_name":"University of Warsaw","ror":"https://ror.org/039bjqg32","country_code":"PL","type":"education","lineage":["https://openalex.org/I4654613"]}],"countries":["CN","PL"],"is_corresponding":true,"raw_author_name":"Mateusz Fedoryszak","raw_affiliation_strings":["citation \u2192 document","Interdisciplinary Centre for Mathematical and Computational Modelling at University of Warsaw"],"affiliations":[{"raw_affiliation_string":"citation \u2192 document","institution_ids":["https://openalex.org/I4210124246"]},{"raw_affiliation_string":"Interdisciplinary Centre for Mathematical and Computational Modelling at University of Warsaw","institution_ids":["https://openalex.org/I4654613"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5010356596","display_name":"\u0141ukasz Bolikowski","orcid":"https://orcid.org/0000-0003-1048-6051"},"institutions":[{"id":"https://openalex.org/I4210124246","display_name":"CITIC Group (China)","ror":"https://ror.org/037b6wy35","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210124246"]},{"id":"https://openalex.org/I4654613","display_name":"University of Warsaw","ror":"https://ror.org/039bjqg32","country_code":"PL","type":"education","lineage":["https://openalex.org/I4654613"]}],"countries":["CN","PL"],"is_corresponding":false,"raw_author_name":"\u0141ukasz Bolikowski","raw_affiliation_strings":["citation \u2192 document","Interdisciplinary Centre for Mathematical and Computational Modelling at University of Warsaw"],"affiliations":[{"raw_affiliation_string":"citation \u2192 document","institution_ids":["https://openalex.org/I4210124246"]},{"raw_affiliation_string":"Interdisciplinary Centre for Mathematical and Computational Modelling at University of Warsaw","institution_ids":["https://openalex.org/I4654613"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5080956757"],"corresponding_institution_ids":["https://openalex.org/I4210124246","https://openalex.org/I4654613"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.10068554,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"20","issue":"11/12","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9911999702453613,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11614","display_name":"Cloud Data Security Solutions","score":0.9742000102996826,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/blocking","display_name":"Blocking (statistics)","score":0.7747703194618225},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.522759199142456},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5197767615318298},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.5190547108650208},{"id":"https://openalex.org/keywords/citation","display_name":"Citation","score":0.48334500193595886},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.16792890429496765},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.15515369176864624},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.15490946173667908},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.11834597587585449},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.11413851380348206},{"id":"https://openalex.org/keywords/cartography","display_name":"Cartography","score":0.08521676063537598}],"concepts":[{"id":"https://openalex.org/C144745244","wikidata":"https://www.wikidata.org/wiki/Q4927286","display_name":"Blocking (statistics)","level":2,"score":0.7747703194618225},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.522759199142456},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5197767615318298},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.5190547108650208},{"id":"https://openalex.org/C2778805511","wikidata":"https://www.wikidata.org/wiki/Q1713","display_name":"Citation","level":2,"score":0.48334500193595886},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.16792890429496765},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.15515369176864624},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.15490946173667908},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.11834597587585449},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.11413851380348206},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.08521676063537598}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1045/november14-fedoryszak","is_oa":true,"landing_page_url":"https://doi.org/10.1045/november14-fedoryszak","pdf_url":null,"source":{"id":"https://openalex.org/S119508283","display_name":"D-Lib Magazine","issn_l":"1082-9873","issn":["1082-9873"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310316316","host_organization_name":"Corporation for National Research Initiatives","host_organization_lineage":["https://openalex.org/P4310316316"],"host_organization_lineage_names":["Corporation for National Research Initiatives"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"D-Lib Magazine","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1045/november14-fedoryszak","is_oa":true,"landing_page_url":"https://doi.org/10.1045/november14-fedoryszak","pdf_url":null,"source":{"id":"https://openalex.org/S119508283","display_name":"D-Lib Magazine","issn_l":"1082-9873","issn":["1082-9873"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310316316","host_organization_name":"Corporation for National Research Initiatives","host_organization_lineage":["https://openalex.org/P4310316316"],"host_organization_lineage_names":["Corporation for National Research Initiatives"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"D-Lib Magazine","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2392835431","https://openalex.org/W1965371215","https://openalex.org/W2126932387","https://openalex.org/W1842396145","https://openalex.org/W2353762239","https://openalex.org/W2484966135","https://openalex.org/W2015050211","https://openalex.org/W2390279801","https://openalex.org/W2126435977"],"abstract_inverted_index":{"Most":[0],"commonly":[1],"the":[2],"first":[3],"part":[4],"of":[5,61,69],"record":[6],"deduplication":[7],"is":[8,24,45,74],"blocking.":[9],"During":[10],"this":[11],"phase,":[12],"roughly":[13],"similar":[14],"entities":[15],"are":[16,51,87],"grouped":[17],"into":[18],"blocks":[19],"where":[20],"more":[21],"exact":[22],"clustering":[23],"performed.":[25],"We":[26],"present":[27],"a":[28,56],"blocking":[29,39],"method":[30],"for":[31],"citation":[32,83],"matching":[33,84],"based":[34],"on":[35],"hash":[36,49,72],"functions.":[37],"A":[38,47],"workflow":[40,85],"implemented":[41],"in":[42],"Apache":[43],"Hadoop":[44],"outlined.":[46],"few":[48],"functions":[50,73],"proposed":[52],"and":[53],"compared":[54],"with":[55,64],"particular":[57],"concern":[58],"about":[59],"feasibility":[60],"their":[62],"usage":[63],"big":[65],"data.":[66],"The":[67],"possibility":[68],"combining":[70],"various":[71],"investigated.":[75],"Finally,":[76],"some":[77],"technical":[78],"details":[79],"related":[80],"to":[81],"full":[82],"implementation":[86],"revealed.":[88]},"counts_by_year":[{"year":2019,"cited_by_count":1}],"updated_date":"2026-02-25T21:11:00.739837","created_date":"2025-10-10T00:00:00"}
