{"id":"https://openalex.org/W7162398679","doi":"https://doi.org/10.1145/3788853.3801599","title":"SAGE: Explainable Semantic-Aware Graph-Based Entity Integration for Heterogeneous Data Sources","display_name":"SAGE: Explainable Semantic-Aware Graph-Based Entity Integration for Heterogeneous Data Sources","publication_year":2026,"publication_date":"2026-05-26","ids":{"openalex":"https://openalex.org/W7162398679","doi":"https://doi.org/10.1145/3788853.3801599"},"language":null,"primary_location":{"id":"doi:10.1145/3788853.3801599","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3788853.3801599","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Companion of the International Conference on Management of Data","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3788853.3801599","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136993541","display_name":"Jaya Prakash Arjarapu","orcid":"https://orcid.org/0009-0006-4282-1191"},"institutions":[{"id":"https://openalex.org/I121750182","display_name":"National Institute of Technology Warangal","ror":"https://ror.org/017ebfz38","country_code":"IN","type":"education","lineage":["https://openalex.org/I121750182"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Jaya Prakash Arjarapu","raw_affiliation_strings":["National Institute of Technology Warangal, Warangal, Telangana, India"],"raw_orcid":"https://orcid.org/0009-0006-4282-1191","affiliations":[{"raw_affiliation_string":"National Institute of Technology Warangal, Warangal, Telangana, India","institution_ids":["https://openalex.org/I121750182"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137084859","display_name":"Sai Chinmai Kalakota","orcid":"https://orcid.org/0009-0009-3785-9939"},"institutions":[{"id":"https://openalex.org/I121750182","display_name":"National Institute of Technology Warangal","ror":"https://ror.org/017ebfz38","country_code":"IN","type":"education","lineage":["https://openalex.org/I121750182"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Sai Chinmai Kalakota","raw_affiliation_strings":["National Institute of Technology Warangal, Warangal, Telangana, India"],"raw_orcid":"https://orcid.org/0009-0009-3785-9939","affiliations":[{"raw_affiliation_string":"National Institute of Technology Warangal, Warangal, Telangana, India","institution_ids":["https://openalex.org/I121750182"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101815521","display_name":"Radha Krishna Pisipati","orcid":"https://orcid.org/0000-0001-8298-7571"},"institutions":[{"id":"https://openalex.org/I121750182","display_name":"National Institute of Technology Warangal","ror":"https://ror.org/017ebfz38","country_code":"IN","type":"education","lineage":["https://openalex.org/I121750182"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Radha Krishna Pisipati","raw_affiliation_strings":["National Institute of Technology Warangal, Warangal, Telangana, India"],"raw_orcid":"https://orcid.org/0000-0001-8298-7571","affiliations":[{"raw_affiliation_string":"National Institute of Technology Warangal, Warangal, Telangana, India","institution_ids":["https://openalex.org/I121750182"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.84146608,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"10","last_page":"13"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.52920001745224,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.52920001745224,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.14010000228881836,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.06629999727010727,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/data-integration","display_name":"Data integration","score":0.5584999918937683},{"id":"https://openalex.org/keywords/data-source","display_name":"Data source","score":0.30869999527931213},{"id":"https://openalex.org/keywords/information-integration","display_name":"Information integration","score":0.271699994802475},{"id":"https://openalex.org/keywords/component","display_name":"Component (thermodynamics)","score":0.26429998874664307}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6399000287055969},{"id":"https://openalex.org/C72634772","wikidata":"https://www.wikidata.org/wiki/Q386824","display_name":"Data integration","level":2,"score":0.5584999918937683},{"id":"https://openalex.org/C2983685735","wikidata":"https://www.wikidata.org/wiki/Q5227355","display_name":"Data source","level":2,"score":0.30869999527931213},{"id":"https://openalex.org/C33326189","wikidata":"https://www.wikidata.org/wiki/Q17092450","display_name":"Information integration","level":2,"score":0.271699994802475},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.26429998874664307},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.263700008392334},{"id":"https://openalex.org/C19527686","wikidata":"https://www.wikidata.org/wiki/Q1665453","display_name":"System integration","level":2,"score":0.2574000060558319},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.2515000104904175},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.24789999425411224},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.24650000035762787}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3788853.3801599","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3788853.3801599","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Companion of the International Conference on Management of Data","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3788853.3801599","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3788853.3801599","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Companion of the International Conference on Management of Data","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":4,"referenced_works":["https://openalex.org/W2406114359","https://openalex.org/W2798649495","https://openalex.org/W3092962901","https://openalex.org/W4385270687"],"related_works":[],"abstract_inverted_index":{"Integrating":[0],"heterogeneous":[1,57],"data":[2,19,141],"sources":[3,58,142],"such":[4],"as":[5,104],"CSV":[6],"files,":[7],"JSON":[8],"APIs,":[9],"PDFs,":[10],"and":[11,26,85,113,135],"relational":[12],"databases":[13],"remains":[14],"a":[15,64,70,105],"core":[16],"challenge":[17],"in":[18,101],"management.":[20],"Existing":[21],"schema":[22],"matching,":[23],"entity":[24,60],"resolution,":[25],"recent":[27],"LLM-based":[28],"approaches":[29],"either":[30],"aggressively":[31],"merge":[32],"entities":[33],"leading":[34],"to":[35,115],"semantic":[36,48,88,119],"errors":[37],"or":[38],"hallucinate":[39],"joins":[40],"without":[41,59],"validation.":[42],"We":[43],"demonstrate":[44],"SAGE,":[45],"an":[46,122],"explainable":[47],"integration":[49],"system":[50],"that":[51,67],"answers":[52],"natural":[53],"language":[54],"queries":[55,139],"over":[56],"merging.":[61],"SAGE":[62,99,128],"introduces":[63],"neuro-symbolic":[65],"architecture":[66],"combines":[68],"(i)":[69],"no-merge":[71],"knowledge":[72],"graph,":[73],"(ii)":[74],"LLM-driven":[75],"concept":[76],"classification,":[77],"(iii)":[78],"validated":[79,112],"virtual":[80],"links":[81],"for":[82],"cross-source":[83],"joins,":[84,131],"(iv)":[86],"graph-refined":[87],"fingerprints":[89],"using":[90],"Graph":[91],"Attention":[92],"Networks":[93],"(GATs).":[94],"The":[95],"key":[96],"novelty":[97],"of":[98],"lies":[100],"treating":[102],"explainability":[103],"first-class":[106],"concern:":[107],"every":[108],"join":[109],"is":[110],"explicitly":[111],"exposed":[114],"the":[116],"user":[117],"with":[118],"evidence.":[120],"Through":[121],"interactive":[123],"demonstration,":[124],"attendees":[125],"explore":[126],"how":[127],"discovers":[129],"hidden":[130],"prunes":[132],"incorrect":[133],"ones,":[134],"executes":[136],"transparent,":[137],"schema-free":[138],"across":[140]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-27T00:00:00"}
