{"id":"https://openalex.org/W2000385060","doi":"https://doi.org/10.1109/spac.2014.6982731","title":"Semantic-based intelligent data clean framework for big data","display_name":"Semantic-based intelligent data clean framework for big data","publication_year":2014,"publication_date":"2014-10-01","ids":{"openalex":"https://openalex.org/W2000385060","doi":"https://doi.org/10.1109/spac.2014.6982731","mag":"2000385060"},"language":"en","primary_location":{"id":"doi:10.1109/spac.2014.6982731","is_oa":false,"landing_page_url":"https://doi.org/10.1109/spac.2014.6982731","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings 2014 IEEE International Conference on Security, Pattern Analysis, and Cybernetics (SPAC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100690215","display_name":"Jia Wang","orcid":"https://orcid.org/0000-0002-1453-2636"},"institutions":[{"id":"https://openalex.org/I2800372957","display_name":"China Electronics Technology Group Corporation","ror":"https://ror.org/0098hst83","country_code":"CN","type":"company","lineage":["https://openalex.org/I2800372957"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jia Wang","raw_affiliation_strings":["The 28th Research Institute of China Electronics Technology Group Corporation, Nanjing, China","The 28th Research Institute of China Electronics Technology Group Corporation,Nanjing,China"],"affiliations":[{"raw_affiliation_string":"The 28th Research Institute of China Electronics Technology Group Corporation, Nanjing, China","institution_ids":["https://openalex.org/I2800372957"]},{"raw_affiliation_string":"The 28th Research Institute of China Electronics Technology Group Corporation,Nanjing,China","institution_ids":["https://openalex.org/I2800372957"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100868690","display_name":"Zhijun Song","orcid":"https://orcid.org/0000-0001-9587-4824"},"institutions":[{"id":"https://openalex.org/I2800372957","display_name":"China Electronics Technology Group Corporation","ror":"https://ror.org/0098hst83","country_code":"CN","type":"company","lineage":["https://openalex.org/I2800372957"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhijun Song","raw_affiliation_strings":["The 28th Research Institute of China Electronics Technology Group Corporation, Nanjing, China","The 28th Research Institute of China Electronics Technology Group Corporation,Nanjing,China"],"affiliations":[{"raw_affiliation_string":"The 28th Research Institute of China Electronics Technology Group Corporation, Nanjing, China","institution_ids":["https://openalex.org/I2800372957"]},{"raw_affiliation_string":"The 28th Research Institute of China Electronics Technology Group Corporation,Nanjing,China","institution_ids":["https://openalex.org/I2800372957"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100340683","display_name":"Qian Li","orcid":"https://orcid.org/0000-0003-4365-1359"},"institutions":[{"id":"https://openalex.org/I2800372957","display_name":"China Electronics Technology Group Corporation","ror":"https://ror.org/0098hst83","country_code":"CN","type":"company","lineage":["https://openalex.org/I2800372957"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qian Li","raw_affiliation_strings":["The 28th Research Institute of China Electronics Technology Group Corporation, Nanjing, China","The 28th Research Institute of China Electronics Technology Group Corporation,Nanjing,China"],"affiliations":[{"raw_affiliation_string":"The 28th Research Institute of China Electronics Technology Group Corporation, Nanjing, China","institution_ids":["https://openalex.org/I2800372957"]},{"raw_affiliation_string":"The 28th Research Institute of China Electronics Technology Group Corporation,Nanjing,China","institution_ids":["https://openalex.org/I2800372957"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050817770","display_name":"Jun Yu","orcid":"https://orcid.org/0000-0003-1922-7283"},"institutions":[{"id":"https://openalex.org/I50760025","display_name":"Hangzhou Dianzi University","ror":"https://ror.org/0576gt767","country_code":"CN","type":"education","lineage":["https://openalex.org/I50760025"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Yu","raw_affiliation_strings":["School of Computer Science and Technology, Hangzhou Dianzi University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Hangzhou Dianzi University, Hangzhou, China","institution_ids":["https://openalex.org/I50760025"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100405394","display_name":"Fei Chen","orcid":"https://orcid.org/0000-0002-4191-8163"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fei Chen","raw_affiliation_strings":["Nanjing LES Information Technology CO., LTD, Nanjing, China","Nanjing LES Information Technology CO., LTD., Nanjing, China"],"affiliations":[{"raw_affiliation_string":"Nanjing LES Information Technology CO., LTD, Nanjing, China","institution_ids":[]},{"raw_affiliation_string":"Nanjing LES Information Technology CO., LTD., Nanjing, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100690215"],"corresponding_institution_ids":["https://openalex.org/I2800372957"],"apc_list":null,"apc_paid":null,"fwci":1.2175,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.81656134,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"38","issue":null,"first_page":"448","last_page":"453"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14280","display_name":"Big Data Technologies and Applications","score":0.9821000099182129,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8891822099685669},{"id":"https://openalex.org/keywords/data-cleansing","display_name":"Data cleansing","score":0.8282831907272339},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.7086772322654724},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5153143405914307},{"id":"https://openalex.org/keywords/semantic-matching","display_name":"Semantic matching","score":0.5150665640830994},{"id":"https://openalex.org/keywords/semantic-computing","display_name":"Semantic computing","score":0.491991251707077},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.4629918038845062},{"id":"https://openalex.org/keywords/semantic-data-model","display_name":"Semantic data model","score":0.45687639713287354},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4412161111831665},{"id":"https://openalex.org/keywords/semantic-technology","display_name":"Semantic technology","score":0.4209045469760895},{"id":"https://openalex.org/keywords/semantic-web","display_name":"Semantic Web","score":0.3211197853088379},{"id":"https://openalex.org/keywords/data-quality","display_name":"Data quality","score":0.11420324444770813}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8891822099685669},{"id":"https://openalex.org/C42199009","wikidata":"https://www.wikidata.org/wiki/Q1172378","display_name":"Data cleansing","level":4,"score":0.8282831907272339},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.7086772322654724},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5153143405914307},{"id":"https://openalex.org/C2778493491","wikidata":"https://www.wikidata.org/wiki/Q7449072","display_name":"Semantic matching","level":3,"score":0.5150665640830994},{"id":"https://openalex.org/C511149849","wikidata":"https://www.wikidata.org/wiki/Q7449051","display_name":"Semantic computing","level":3,"score":0.491991251707077},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.4629918038845062},{"id":"https://openalex.org/C90312973","wikidata":"https://www.wikidata.org/wiki/Q7449052","display_name":"Semantic data model","level":2,"score":0.45687639713287354},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4412161111831665},{"id":"https://openalex.org/C6881194","wikidata":"https://www.wikidata.org/wiki/Q7449091","display_name":"Semantic technology","level":4,"score":0.4209045469760895},{"id":"https://openalex.org/C2129575","wikidata":"https://www.wikidata.org/wiki/Q54837","display_name":"Semantic Web","level":2,"score":0.3211197853088379},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.11420324444770813},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/spac.2014.6982731","is_oa":false,"landing_page_url":"https://doi.org/10.1109/spac.2014.6982731","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings 2014 IEEE International Conference on Security, Pattern Analysis, and Cybernetics (SPAC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4699999988079071,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W1061391235","https://openalex.org/W1861600621","https://openalex.org/W1966415964","https://openalex.org/W1972084483","https://openalex.org/W1972702299","https://openalex.org/W1974573780","https://openalex.org/W1977814411","https://openalex.org/W1986017822","https://openalex.org/W2006904655","https://openalex.org/W2024770506","https://openalex.org/W2050071106","https://openalex.org/W2097703723","https://openalex.org/W2105610271","https://openalex.org/W2116502002","https://openalex.org/W2122465391","https://openalex.org/W2123432324","https://openalex.org/W2152756885","https://openalex.org/W2166168249","https://openalex.org/W2379386680","https://openalex.org/W3098527277","https://openalex.org/W3110779036","https://openalex.org/W6639142120","https://openalex.org/W6678103630","https://openalex.org/W7075654357"],"related_works":["https://openalex.org/W2280628760","https://openalex.org/W1990650227","https://openalex.org/W4200411507","https://openalex.org/W2779831736","https://openalex.org/W2382028126","https://openalex.org/W2101810124","https://openalex.org/W1937291787","https://openalex.org/W2146671914","https://openalex.org/W2373133917","https://openalex.org/W4389544911"],"abstract_inverted_index":{"In":[0],"order":[1],"to":[2,42],"overcome":[3],"the":[4],"limitation":[5],"of":[6],"existing":[7],"data":[8,31,75],"cleansing":[9],"methods":[10],"working":[11],"on":[12],"massive":[13],"data,":[14],"in":[15],"this":[16,51],"paper,":[17],"we":[18],"propose":[19],"a":[20,70],"generic":[21],"semantic-based":[22],"framework":[23,53],"using":[24],"parallelized":[25,52],"processing":[26],"model":[27],"for":[28,73],"effective":[29],"big":[30,74],"cleansing.":[32,76],"We":[33],"also":[34],"use":[35],"an":[36],"improved":[37,55],"Semantic-Based":[38,56],"Keyword":[39,57],"Matching":[40,58],"Algorithm":[41,59],"deal":[43],"with":[44,54,63],"duplicate":[45],"data.":[46],"Experimental":[47],"results":[48],"show":[49],"that":[50],"can":[60],"identify":[61],"duplicates":[62],"high":[64],"recall":[65],"and":[66,68],"precision":[67],"have":[69],"good":[71],"performance":[72]},"counts_by_year":[{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
