{"id":"https://openalex.org/W6963227209","doi":"https://doi.org/10.18420/inf2022_85","title":"Approaches for Automated Data Quality Analysis: Syntactic and Semantic Assessment","display_name":"Approaches for Automated Data Quality Analysis: Syntactic and Semantic Assessment","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W6963227209","doi":"https://doi.org/10.18420/inf2022_85"},"language":"en","primary_location":{"id":"doi:10.18420/inf2022_85","is_oa":true,"landing_page_url":"https://doi.org/10.18420/inf2022_85","pdf_url":null,"source":{"id":"https://openalex.org/S7407052918","display_name":"Gesellschaft f\u00fcr Informatik (GI)","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article-journal"},"type":"article","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.18420/inf2022_85","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Ahiagble, Agbodzea Pascal","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ahiagble, Agbodzea Pascal","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Stein, Hannah","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Stein, Hannah","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.1637,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.57628371,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11937","display_name":"Research Data Management Practices","score":0.0007999999797903001,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14201","display_name":"Data Analysis and Archiving","score":0.00019999999494757503,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/data-quality","display_name":"Data quality","score":0.5995000004768372},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.5105000138282776},{"id":"https://openalex.org/keywords/usability","display_name":"Usability","score":0.43880000710487366},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4343999922275543},{"id":"https://openalex.org/keywords/data-validation","display_name":"Data validation","score":0.3783000111579895},{"id":"https://openalex.org/keywords/missing-data","display_name":"Missing data","score":0.36010000109672546},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.34299999475479126},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.33719998598098755}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8321999907493591},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.5995000004768372},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5131999850273132},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.5105000138282776},{"id":"https://openalex.org/C170130773","wikidata":"https://www.wikidata.org/wiki/Q216378","display_name":"Usability","level":2,"score":0.43880000710487366},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4343999922275543},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.38449999690055847},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3837999999523163},{"id":"https://openalex.org/C92446256","wikidata":"https://www.wikidata.org/wiki/Q3306762","display_name":"Data validation","level":2,"score":0.3783000111579895},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3668999969959259},{"id":"https://openalex.org/C9357733","wikidata":"https://www.wikidata.org/wiki/Q6878417","display_name":"Missing data","level":2,"score":0.36010000109672546},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.34299999475479126},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.33719998598098755},{"id":"https://openalex.org/C90312973","wikidata":"https://www.wikidata.org/wiki/Q7449052","display_name":"Semantic data model","level":2,"score":0.3280999958515167},{"id":"https://openalex.org/C2777946921","wikidata":"https://www.wikidata.org/wiki/Q7449044","display_name":"Semantic analysis (machine learning)","level":2,"score":0.3158999979496002},{"id":"https://openalex.org/C3020001037","wikidata":"https://www.wikidata.org/wiki/Q836575","display_name":"Quality assessment","level":3,"score":0.3052000105381012},{"id":"https://openalex.org/C8038995","wikidata":"https://www.wikidata.org/wiki/Q1152135","display_name":"Unsupervised learning","level":2,"score":0.2928999960422516},{"id":"https://openalex.org/C5655090","wikidata":"https://www.wikidata.org/wiki/Q192588","display_name":"Relational database","level":2,"score":0.2847000062465668},{"id":"https://openalex.org/C2780378061","wikidata":"https://www.wikidata.org/wiki/Q25351891","display_name":"Service (business)","level":2,"score":0.2784999907016754},{"id":"https://openalex.org/C138958017","wikidata":"https://www.wikidata.org/wiki/Q190087","display_name":"Data type","level":2,"score":0.26579999923706055},{"id":"https://openalex.org/C60048249","wikidata":"https://www.wikidata.org/wiki/Q37437","display_name":"Syntax","level":2,"score":0.2574999928474426},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.25679999589920044},{"id":"https://openalex.org/C511149849","wikidata":"https://www.wikidata.org/wiki/Q7449051","display_name":"Semantic computing","level":3,"score":0.25049999356269836},{"id":"https://openalex.org/C37926939","wikidata":"https://www.wikidata.org/wiki/Q7449061","display_name":"Semantic equivalence","level":4,"score":0.25}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18420/inf2022_85","is_oa":true,"landing_page_url":"https://doi.org/10.18420/inf2022_85","pdf_url":null,"source":{"id":"https://openalex.org/S7407052918","display_name":"Gesellschaft f\u00fcr Informatik (GI)","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"doi:10.18420/inf2022_85","is_oa":true,"landing_page_url":"https://doi.org/10.18420/inf2022_85","pdf_url":null,"source":{"id":"https://openalex.org/S7407052918","display_name":"Gesellschaft f\u00fcr Informatik (GI)","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article-journal"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Data":[0],"quality":[1,19,72,135,151,169],"significantly":[2],"influences":[3],"data":[4,12,18,23,71,122,134,150,158],"usability":[5],"and":[6,43,59,74,86,101,170],"plays":[7],"an":[8,87,109],"important":[9],"role":[10],"in":[11,56],"trading.":[13],"This":[14,153],"paper":[15],"presents":[16],"a":[17,65,83,94],"analysis":[20,147],"(DQA)":[21],"of":[22,37,148,157,165,175],"tables":[24,123],"on":[25,64,81],"two":[26],"levels.":[27],"The":[28,116],"first,":[29],"the":[30,35,38,41,44,46,51,54,57,60,68,106,138,145,155,163,173],"so-called":[31,47],"syntactic":[32],"level,":[33,49],"concerns":[34,50],"structure":[36],"elements":[39,55],"within":[40],"database":[42,58],"second,":[45],"semantic":[48,111,131],"relationship":[52],"between":[53],"\"real":[61],"world\".":[62],"Based":[63],"literature":[66],"review":[67],"most":[69],"relevant":[70,128],"criteria":[73],"corresponding":[75],"metrics":[76],"were":[77],"derived.":[78],"Subsequently,":[79],"based":[80],"heuristics,":[82],"data-centric":[84],"approach":[85,117],"unsupervised":[88],"machine":[89],"learning":[90],"clustering":[91],"algorithm":[92],"DBSCAN,":[93],"service":[95,113],"for":[96,124,126,167],"automated":[97,110,146],"DQA,":[98],"is":[99,118,142],"designed":[100],"implemented":[102],"(syntactic":[103],"DQA).":[104],"In":[105],"next":[107],"step,":[108],"DQA":[112],"as":[114,160,162],"well.":[115],"used":[119],"to":[120],"examine":[121],"example":[125],"missing":[127],"columns":[129],"(i.e.,":[130],"completeness).":[132],"A":[133],"index":[136],"represents":[137],"services\u2019":[139],"output,":[140],"which":[141],"derived":[143],"from":[144],"various":[149],"criteria.":[152],"enables":[154],"assessment":[156],"quality,":[159],"well":[161],"detection":[164],"potentials":[166],"improving":[168],"thus":[171],"increasing":[172],"value":[174],"tradeable":[176],"data.":[177]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
