{"id":"https://openalex.org/W4411033091","doi":"https://doi.org/10.1145/3735511","title":"Graph Metrics-driven Record Cluster Repair meets LLM-based active learning","display_name":"Graph Metrics-driven Record Cluster Repair meets LLM-based active learning","publication_year":2025,"publication_date":"2025-06-04","ids":{"openalex":"https://openalex.org/W4411033091","doi":"https://doi.org/10.1145/3735511"},"language":"en","primary_location":{"id":"doi:10.1145/3735511","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3735511","pdf_url":null,"source":{"id":"https://openalex.org/S110189822","display_name":"Journal of Data and Information Quality","issn_l":"1936-1955","issn":["1936-1955","1936-1963"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Data and Information Quality","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1145/3735511","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5073205641","display_name":"Victor Christen","orcid":"https://orcid.org/0000-0001-7175-7359"},"institutions":[{"id":"https://openalex.org/I147765834","display_name":"Leipzig University of Applied Sciences","ror":"https://ror.org/03xgcq477","country_code":"DE","type":"education","lineage":["https://openalex.org/I147765834"]},{"id":"https://openalex.org/I926574661","display_name":"Leipzig University","ror":"https://ror.org/03s7gtk40","country_code":"DE","type":"education","lineage":["https://openalex.org/I926574661"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Victor Christen","raw_affiliation_strings":["Faculty of Mathematics and Computer Science, Leipzig University","Faculty of Mathematics and Computer Science, Leipzig University, Leipzig, Germany"],"affiliations":[{"raw_affiliation_string":"Faculty of Mathematics and Computer Science, Leipzig University","institution_ids":["https://openalex.org/I147765834","https://openalex.org/I926574661"]},{"raw_affiliation_string":"Faculty of Mathematics and Computer Science, Leipzig University, Leipzig, Germany","institution_ids":["https://openalex.org/I926574661"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076040503","display_name":"Daniel Obraczka","orcid":"https://orcid.org/0000-0002-0366-9872"},"institutions":[{"id":"https://openalex.org/I147765834","display_name":"Leipzig University of Applied Sciences","ror":"https://ror.org/03xgcq477","country_code":"DE","type":"education","lineage":["https://openalex.org/I147765834"]},{"id":"https://openalex.org/I926574661","display_name":"Leipzig University","ror":"https://ror.org/03s7gtk40","country_code":"DE","type":"education","lineage":["https://openalex.org/I926574661"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Daniel Obraczka","raw_affiliation_strings":["Faculty of Mathematics and Computer Science, Leipzig University","Faculty of Mathematics and Computer Science, Leipzig University, Leipzig, Germany"],"affiliations":[{"raw_affiliation_string":"Faculty of Mathematics and Computer Science, Leipzig University","institution_ids":["https://openalex.org/I147765834","https://openalex.org/I926574661"]},{"raw_affiliation_string":"Faculty of Mathematics and Computer Science, Leipzig University, Leipzig, Germany","institution_ids":["https://openalex.org/I926574661"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039821674","display_name":"Marvin Hofer","orcid":"https://orcid.org/0000-0003-4667-5743"},"institutions":[{"id":"https://openalex.org/I147765834","display_name":"Leipzig University of Applied Sciences","ror":"https://ror.org/03xgcq477","country_code":"DE","type":"education","lineage":["https://openalex.org/I147765834"]},{"id":"https://openalex.org/I926574661","display_name":"Leipzig University","ror":"https://ror.org/03s7gtk40","country_code":"DE","type":"education","lineage":["https://openalex.org/I926574661"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Marvin Hofer","raw_affiliation_strings":["Faculty of Mathematics and Computer Science, Leipzig University","Faculty of Mathematics and Computer Science, Leipzig University, Leipzig, Germany"],"affiliations":[{"raw_affiliation_string":"Faculty of Mathematics and Computer Science, Leipzig University","institution_ids":["https://openalex.org/I147765834","https://openalex.org/I926574661"]},{"raw_affiliation_string":"Faculty of Mathematics and Computer Science, Leipzig University, Leipzig, Germany","institution_ids":["https://openalex.org/I926574661"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057385787","display_name":"Martin Franke","orcid":"https://orcid.org/0000-0003-4157-8637"},"institutions":[{"id":"https://openalex.org/I926574661","display_name":"Leipzig University","ror":"https://ror.org/03s7gtk40","country_code":"DE","type":"education","lineage":["https://openalex.org/I926574661"]},{"id":"https://openalex.org/I147765834","display_name":"Leipzig University of Applied Sciences","ror":"https://ror.org/03xgcq477","country_code":"DE","type":"education","lineage":["https://openalex.org/I147765834"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Martin Franke","raw_affiliation_strings":["Faculty of Mathematics and Computer Science, Leipzig University","Faculty of Mathematics and Computer Science, Leipzig University, Leipzig, Germany"],"affiliations":[{"raw_affiliation_string":"Faculty of Mathematics and Computer Science, Leipzig University","institution_ids":["https://openalex.org/I147765834","https://openalex.org/I926574661"]},{"raw_affiliation_string":"Faculty of Mathematics and Computer Science, Leipzig University, Leipzig, Germany","institution_ids":["https://openalex.org/I926574661"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5075756237","display_name":"Erhard Rahm","orcid":"https://orcid.org/0000-0002-2665-1114"},"institutions":[{"id":"https://openalex.org/I147765834","display_name":"Leipzig University of Applied Sciences","ror":"https://ror.org/03xgcq477","country_code":"DE","type":"education","lineage":["https://openalex.org/I147765834"]},{"id":"https://openalex.org/I926574661","display_name":"Leipzig University","ror":"https://ror.org/03s7gtk40","country_code":"DE","type":"education","lineage":["https://openalex.org/I926574661"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Erhard Rahm","raw_affiliation_strings":["Faculty of Mathematics and Computer Science, Leipzig University","Faculty of Mathematics and Computer Science, Leipzig University, Leipzig, Germany"],"affiliations":[{"raw_affiliation_string":"Faculty of Mathematics and Computer Science, Leipzig University","institution_ids":["https://openalex.org/I147765834","https://openalex.org/I926574661"]},{"raw_affiliation_string":"Faculty of Mathematics and Computer Science, Leipzig University, Leipzig, Germany","institution_ids":["https://openalex.org/I926574661"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5073205641"],"corresponding_institution_ids":["https://openalex.org/I147765834","https://openalex.org/I926574661"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.16888647,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"17","issue":"2","first_page":"1","last_page":"25"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9771000146865845,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9768999814987183,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8253902792930603},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.5552597641944885},{"id":"https://openalex.org/keywords/cluster","display_name":"Cluster (spacecraft)","score":0.499539852142334},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.34523826837539673},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.16086718440055847}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8253902792930603},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.5552597641944885},{"id":"https://openalex.org/C164866538","wikidata":"https://www.wikidata.org/wiki/Q367351","display_name":"Cluster (spacecraft)","level":2,"score":0.499539852142334},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.34523826837539673},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.16086718440055847}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3735511","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3735511","pdf_url":null,"source":{"id":"https://openalex.org/S110189822","display_name":"Journal of Data and Information Quality","issn_l":"1936-1955","issn":["1936-1955","1936-1963"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Data and Information Quality","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3735511","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3735511","pdf_url":null,"source":{"id":"https://openalex.org/S110189822","display_name":"Journal of Data and Information Quality","issn_l":"1936-1955","issn":["1936-1955","1936-1963"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Data and Information Quality","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":46,"referenced_works":["https://openalex.org/W1568705965","https://openalex.org/W1921100529","https://openalex.org/W1967570846","https://openalex.org/W1971937094","https://openalex.org/W1997927541","https://openalex.org/W2031250218","https://openalex.org/W2052698082","https://openalex.org/W2066636486","https://openalex.org/W2118966355","https://openalex.org/W2143893259","https://openalex.org/W2148524305","https://openalex.org/W2170039925","https://openalex.org/W2175093940","https://openalex.org/W2239873446","https://openalex.org/W2292361954","https://openalex.org/W2295240344","https://openalex.org/W2569876941","https://openalex.org/W2744742467","https://openalex.org/W2747329762","https://openalex.org/W2798649495","https://openalex.org/W2805602976","https://openalex.org/W2892018760","https://openalex.org/W2945827377","https://openalex.org/W2994940956","https://openalex.org/W2997655715","https://openalex.org/W3014074327","https://openalex.org/W3014705052","https://openalex.org/W3017317785","https://openalex.org/W3032005113","https://openalex.org/W3045211065","https://openalex.org/W3092962901","https://openalex.org/W3101553402","https://openalex.org/W3123375411","https://openalex.org/W3139815093","https://openalex.org/W3154675738","https://openalex.org/W3156669901","https://openalex.org/W3177179246","https://openalex.org/W3202190154","https://openalex.org/W3209968928","https://openalex.org/W4242744113","https://openalex.org/W4293651041","https://openalex.org/W4301186107","https://openalex.org/W4321448364","https://openalex.org/W4386128198","https://openalex.org/W4386298264","https://openalex.org/W4389523900"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Entity":[0],"resolution":[1,12],"plays":[2],"an":[3,200,215],"important":[4],"role":[5],"in":[6,116],"data":[7,37,77,97,101,138],"integration.":[8],"However,":[9,99],"most":[10,91],"entity":[11],"methods":[13,57,115,121,229],"focus":[14],"on":[15,28,127,151],"pairwise":[16],"linkage":[17],"and":[18,63,154,185,230,237],"ignore":[19],"potential":[20],"errors":[21,62],"generated":[22],"by":[23],"the":[24,29,52,67,72,76,95,143,146,152,173,186,221],"transitive":[25,40],"closure":[26,41],"based":[27,126],"determined":[30],"equality":[31],"links":[32,184],"between":[33],"two":[34],"or":[35,122],"more":[36,232],"sources.":[38],"The":[39,217],"of":[42,70,145,183,188],"a":[43,46,128,161,180],"record":[44,50,93],"forms":[45],"cluster":[47,87,165,227],"where":[48],"each":[49,86],"represents":[51],"same":[53,96],"entity.":[54],"Cluster":[55],"repair":[56,166,228],"aim":[58],"to":[59,108,137,194,205],"determine":[60],"these":[61],"correct":[64],"them.":[65],"In":[66,156,192],"first":[68],"category":[69],"methods,":[71],"assumption":[73,106],"is":[74,231],"that":[75,167,220],"sources":[78,139],"themselves":[79],"do":[80],"not":[81],"contain":[82,89],"any":[83],"duplicates.":[84,141],"Consequently,":[85],"can":[88,134],"at":[90],"one":[92],"from":[94,104,172],"source.":[98],"real-world":[100],"often":[102],"deviates":[103],"this":[105,157],"due":[107],"quality":[109,144],"issues.":[110],"Recent":[111],"approaches":[112],"apply":[113],"clustering":[114,124],"combination":[117],"with":[118,140],"link":[119],"categorization":[120],"graph":[123,130,169,195,222],"algorithms":[125],"single":[129],"metric":[131],"so":[132],"they":[133],"be":[135],"applied":[136],"Nevertheless,":[142],"results":[147],"highly":[148],"varies":[149],"depending":[150],"configuration":[153],"dataset.":[155],"study,":[158],"we":[159,198,209],"introduce":[160],"novel":[162],"approach":[163],"for":[164],"utilizes":[168],"metrics":[170,178],"derived":[171],"underlying":[174],"similarity":[175],"graphs.":[176],"These":[177],"enable":[179],"comprehensive":[181],"characterization":[182],"generation":[187],"enhanced":[189],"classification":[190],"models.":[191],"addition":[193],"metric-based":[196,223],"models,":[197],"integrate":[199,210],"active":[201],"learning":[202],"mechanism":[203],"tailored":[204],"cluster-specific":[206],"attributes.":[207],"Moreover,":[208],"large":[211],"language":[212],"models":[213],"as":[214],"oracle.":[216],"evaluation":[218],"shows":[219],"method":[224],"outperforms":[225],"existing":[226],"robust":[233],"regarding":[234],"different":[235],"datasets":[236],"configurations.":[238]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
