{"id":"https://openalex.org/W2778509778","doi":"https://doi.org/10.1007/978-981-10-7605-3_82","title":"Hadoop Based Parallel Deduplication Method for Web Documents","display_name":"Hadoop Based Parallel Deduplication Method for Web Documents","publication_year":2017,"publication_date":"2017-12-19","ids":{"openalex":"https://openalex.org/W2778509778","doi":"https://doi.org/10.1007/978-981-10-7605-3_82","mag":"2778509778"},"language":"en","primary_location":{"id":"doi:10.1007/978-981-10-7605-3_82","is_oa":false,"landing_page_url":"https://doi.org/10.1007/978-981-10-7605-3_82","pdf_url":null,"source":{"id":"https://openalex.org/S4210179954","display_name":"Lecture notes in electrical engineering","issn_l":"1876-1100","issn":["1876-1100","1876-1119"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lecture Notes in Electrical Engineering","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101708940","display_name":"Junjie Song","orcid":"https://orcid.org/0000-0002-6486-521X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Junjie Song","raw_affiliation_strings":["College of Information, Shanghai Martime University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"College of Information, Shanghai Martime University, Shanghai, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100642607","display_name":"Jin Liu","orcid":"https://orcid.org/0000-0001-7249-698X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jin Liu","raw_affiliation_strings":["College of Information, Shanghai Martime University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"College of Information, Shanghai Martime University, Shanghai, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100698166","display_name":"Yuhui Zheng","orcid":"https://orcid.org/0000-0002-4408-3800"},"institutions":[{"id":"https://openalex.org/I200845125","display_name":"Nanjing University of Information Science and Technology","ror":"https://ror.org/02y0rxk19","country_code":"CN","type":"education","lineage":["https://openalex.org/I200845125"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yuhui Zheng","raw_affiliation_strings":["School of Computer and Software, Nanjing University of Information Science and Technology, Nanjing, 210044, China"],"affiliations":[{"raw_affiliation_string":"School of Computer and Software, Nanjing University of Information Science and Technology, Nanjing, 210044, China","institution_ids":["https://openalex.org/I200845125"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100698166"],"corresponding_institution_ids":["https://openalex.org/I200845125"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.23815848,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"499","last_page":"504"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9753999710083008,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7845299243927002},{"id":"https://openalex.org/keywords/data-deduplication","display_name":"Data deduplication","score":0.7674065232276917},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.686040461063385},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.622912585735321},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4216160178184509},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.4164832830429077},{"id":"https://openalex.org/keywords/tree","display_name":"Tree (set theory)","score":0.41276246309280396},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.36323872208595276},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10007166862487793},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.061595767736434937}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7845299243927002},{"id":"https://openalex.org/C32587265","wikidata":"https://www.wikidata.org/wiki/Q1182260","display_name":"Data deduplication","level":2,"score":0.7674065232276917},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.686040461063385},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.622912585735321},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4216160178184509},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.4164832830429077},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.41276246309280396},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.36323872208595276},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10007166862487793},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.061595767736434937}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/978-981-10-7605-3_82","is_oa":false,"landing_page_url":"https://doi.org/10.1007/978-981-10-7605-3_82","pdf_url":null,"source":{"id":"https://openalex.org/S4210179954","display_name":"Lecture notes in electrical engineering","issn_l":"1876-1100","issn":["1876-1100","1876-1119"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lecture Notes in Electrical Engineering","raw_type":"book-chapter"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W1525595230","https://openalex.org/W1854214752","https://openalex.org/W1956559956","https://openalex.org/W1978394996","https://openalex.org/W2057989089","https://openalex.org/W2068632118","https://openalex.org/W2130055503","https://openalex.org/W2152565070","https://openalex.org/W2173213060","https://openalex.org/W2359493093","https://openalex.org/W2369315739","https://openalex.org/W4230872509","https://openalex.org/W4285719527","https://openalex.org/W6639055396"],"related_works":["https://openalex.org/W3144870715","https://openalex.org/W3142319788","https://openalex.org/W2587188779","https://openalex.org/W3132870970","https://openalex.org/W2943088381","https://openalex.org/W4385804830","https://openalex.org/W2144348063","https://openalex.org/W2074021203","https://openalex.org/W4296125805","https://openalex.org/W1982579475"],"abstract_inverted_index":null,"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
