{"id":"https://openalex.org/W3174369108","doi":"https://doi.org/10.1145/3448016.3452801","title":"Reducing Ambiguity in Json Schema Discovery","display_name":"Reducing Ambiguity in Json Schema Discovery","publication_year":2021,"publication_date":"2021-06-09","ids":{"openalex":"https://openalex.org/W3174369108","doi":"https://doi.org/10.1145/3448016.3452801","mag":"3174369108"},"language":"en","primary_location":{"id":"doi:10.1145/3448016.3452801","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3448016.3452801","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3448016.3452801","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2021 International Conference on Management of Data","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3448016.3452801","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5023353797","display_name":"William Spoth","orcid":null},"institutions":[{"id":"https://openalex.org/I63190737","display_name":"University at Buffalo, State University of New York","ror":"https://ror.org/01y64my43","country_code":"US","type":"education","lineage":["https://openalex.org/I63190737"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"William Spoth","raw_affiliation_strings":["University at Buffalo, SUNY, Buffalo, NY, USA"],"affiliations":[{"raw_affiliation_string":"University at Buffalo, SUNY, Buffalo, NY, USA","institution_ids":["https://openalex.org/I63190737"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071259962","display_name":"Oliver Kennedy","orcid":"https://orcid.org/0000-0003-0632-1668"},"institutions":[{"id":"https://openalex.org/I63190737","display_name":"University at Buffalo, State University of New York","ror":"https://ror.org/01y64my43","country_code":"US","type":"education","lineage":["https://openalex.org/I63190737"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Oliver Kennedy","raw_affiliation_strings":["University at Buffalo, SUNY, Buffalo, NY, USA"],"affiliations":[{"raw_affiliation_string":"University at Buffalo, SUNY, Buffalo, NY, USA","institution_ids":["https://openalex.org/I63190737"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057666375","display_name":"Ying L\u00fc","orcid":"https://orcid.org/0000-0002-7698-8962"},"institutions":[{"id":"https://openalex.org/I1342911587","display_name":"Oracle (United States)","ror":"https://ror.org/006c77m33","country_code":"US","type":"company","lineage":["https://openalex.org/I1342911587"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ying Lu","raw_affiliation_strings":["Oracle, Redwood City, CA, USA"],"affiliations":[{"raw_affiliation_string":"Oracle, Redwood City, CA, USA","institution_ids":["https://openalex.org/I1342911587"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041506788","display_name":"Beda Christoph Hammerschmidt","orcid":null},"institutions":[{"id":"https://openalex.org/I1342911587","display_name":"Oracle (United States)","ror":"https://ror.org/006c77m33","country_code":"US","type":"company","lineage":["https://openalex.org/I1342911587"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Beda Hammerschmidt","raw_affiliation_strings":["Oracle, Redwood City, CA, USA"],"affiliations":[{"raw_affiliation_string":"Oracle, Redwood City, CA, USA","institution_ids":["https://openalex.org/I1342911587"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5111489764","display_name":"Zhen Hua Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I1342911587","display_name":"Oracle (United States)","ror":"https://ror.org/006c77m33","country_code":"US","type":"company","lineage":["https://openalex.org/I1342911587"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhen Hua Liu","raw_affiliation_strings":["Oracle, Redwood City, CA, USA"],"affiliations":[{"raw_affiliation_string":"Oracle, Redwood City, CA, USA","institution_ids":["https://openalex.org/I1342911587"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5023353797"],"corresponding_institution_ids":["https://openalex.org/I63190737"],"apc_list":null,"apc_paid":null,"fwci":2.6912,"has_fulltext":true,"cited_by_count":16,"citation_normalized_percentile":{"value":0.89883009,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1732","last_page":"1744"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.801501989364624},{"id":"https://openalex.org/keywords/json","display_name":"JSON","score":0.673181414604187},{"id":"https://openalex.org/keywords/schema","display_name":"Schema (genetic algorithms)","score":0.635183572769165},{"id":"https://openalex.org/keywords/ambiguity","display_name":"Ambiguity","score":0.5607433915138245},{"id":"https://openalex.org/keywords/star-schema","display_name":"Star schema","score":0.5512588024139404},{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.540080189704895},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4600306749343872},{"id":"https://openalex.org/keywords/document-structure-description","display_name":"Document Structure Description","score":0.4107087254524231},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.39385485649108887},{"id":"https://openalex.org/keywords/database-schema","display_name":"Database schema","score":0.30742135643959045},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.272991806268692},{"id":"https://openalex.org/keywords/xml","display_name":"XML","score":0.18307170271873474},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.1472753882408142}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.801501989364624},{"id":"https://openalex.org/C2780416260","wikidata":"https://www.wikidata.org/wiki/Q2063","display_name":"JSON","level":2,"score":0.673181414604187},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.635183572769165},{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.5607433915138245},{"id":"https://openalex.org/C190703929","wikidata":"https://www.wikidata.org/wiki/Q1331138","display_name":"Star schema","level":4,"score":0.5512588024139404},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.540080189704895},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4600306749343872},{"id":"https://openalex.org/C68699486","wikidata":"https://www.wikidata.org/wiki/Q265904","display_name":"Document Structure Description","level":3,"score":0.4107087254524231},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.39385485649108887},{"id":"https://openalex.org/C30775581","wikidata":"https://www.wikidata.org/wiki/Q632285","display_name":"Database schema","level":3,"score":0.30742135643959045},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.272991806268692},{"id":"https://openalex.org/C8797682","wikidata":"https://www.wikidata.org/wiki/Q2115","display_name":"XML","level":2,"score":0.18307170271873474},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.1472753882408142},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C148840519","wikidata":"https://www.wikidata.org/wiki/Q1049878","display_name":"Database design","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3448016.3452801","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3448016.3452801","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3448016.3452801","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2021 International Conference on Management of Data","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3448016.3452801","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3448016.3452801","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3448016.3452801","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2021 International Conference on Management of Data","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.6700000166893005,"id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G1263082790","display_name":null,"funder_award_id":"Oracle University Relations","funder_id":"https://openalex.org/F4320307904","funder_display_name":"Oracle"},{"id":"https://openalex.org/G8266101516","display_name":null,"funder_award_id":"IIS-1750460","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8697568595","display_name":"CAREER: Declarative Uncertainty","funder_award_id":"1750460","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320307904","display_name":"Oracle","ror":"https://ror.org/006c77m33"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3174369108.pdf","grobid_xml":"https://content.openalex.org/works/W3174369108.grobid-xml"},"referenced_works_count":31,"referenced_works":["https://openalex.org/W1979161933","https://openalex.org/W2004281168","https://openalex.org/W2035786766","https://openalex.org/W2048687871","https://openalex.org/W2091009885","https://openalex.org/W2105883975","https://openalex.org/W2108350934","https://openalex.org/W2109024523","https://openalex.org/W2123229843","https://openalex.org/W2133640934","https://openalex.org/W2134356404","https://openalex.org/W2137692226","https://openalex.org/W2140609014","https://openalex.org/W2153279985","https://openalex.org/W2154475850","https://openalex.org/W2169131377","https://openalex.org/W2265048516","https://openalex.org/W2434390175","https://openalex.org/W2438877200","https://openalex.org/W2479254933","https://openalex.org/W2604790380","https://openalex.org/W2751338424","https://openalex.org/W2806779957","https://openalex.org/W2807795190","https://openalex.org/W2887418847","https://openalex.org/W2907544011","https://openalex.org/W3013746441","https://openalex.org/W6678385900","https://openalex.org/W6684695314","https://openalex.org/W6717843792","https://openalex.org/W6736469572"],"related_works":["https://openalex.org/W2034415381","https://openalex.org/W2087376388","https://openalex.org/W1561927997","https://openalex.org/W2103472145","https://openalex.org/W2092058806","https://openalex.org/W2042840812","https://openalex.org/W4238907651","https://openalex.org/W1992271858","https://openalex.org/W2389917040","https://openalex.org/W1545286006"],"abstract_inverted_index":{"Ad-hoc":[0],"data":[1,12],"models":[2],"like":[3],"Json":[4,26,91],"simplify":[5],"schema":[6,39,46,51,60,92,113],"evolution":[7],"and":[8,30],"enable":[9],"multiplexing":[10],"various":[11],"sources":[13],"into":[14],"a":[15,90],"single":[16],"stream.":[17],"While":[18],"useful":[19],"when":[20],"writing":[21],"data,":[22],"this":[23,85],"flexibility":[24],"makes":[25],"harder":[27],"to":[28,35,54],"validate":[29],"query,":[31],"forcing":[32],"such":[33],"tasks":[34],"rely":[36],"on":[37],"automated":[38],"discovery":[40,52,93],"techniques.":[41],"Unfortunately,":[42],"ambiguity":[43],"in":[44],"the":[45,71,111],"design":[47],"space":[48],"forces":[49],"existing":[50],"systems":[53],"make":[55],"simplifying,":[56],"data-independent":[57],"assumptions":[58,64],"about":[59],"structure.":[61],"When":[62],"these":[63],"are":[65,74],"violated,":[66],"most":[67],"notably":[68],"by":[69],"APIs,":[70],"generated":[72],"schemas":[73],"imprecise,":[75],"creating":[76],"numerous":[77],"opportunities":[78],"for":[79],"false":[80],"positives":[81],"during":[82],"validation.":[83],"In":[84],"paper,":[86],"we":[87,115],"propose":[88],"Jxplain,":[89],"algorithm":[94],"with":[95],"heuristics":[96],"that":[97,117],"mitigate":[98],"common":[99],"forms":[100],"of":[101,110],"ambiguity.":[102],"Although":[103],"Jxplain":[104],"is":[105],"slightly":[106],"slower":[107],"than":[108],"state":[109],"art":[112],"extractors,":[114],"show":[116],"it":[118],"produces":[119],"significantly":[120],"more":[121],"precise":[122],"schemas.":[123]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":2}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
