{"id":"https://openalex.org/W4236700347","doi":"https://doi.org/10.1504/ijict.2021.116556","title":"Research on fast de-duplication of text backup information in library database based on big data","display_name":"Research on fast de-duplication of text backup information in library database based on big data","publication_year":2021,"publication_date":"2021-01-01","ids":{"openalex":"https://openalex.org/W4236700347","doi":"https://doi.org/10.1504/ijict.2021.116556"},"language":"en","primary_location":{"id":"doi:10.1504/ijict.2021.116556","is_oa":false,"landing_page_url":"https://doi.org/10.1504/ijict.2021.116556","pdf_url":null,"source":{"id":"https://openalex.org/S168803321","display_name":"International Journal of Information and Communication Technology","issn_l":"1466-6642","issn":["1466-6642","1741-8070"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310317825","host_organization_name":"Inderscience Publishers","host_organization_lineage":["https://openalex.org/P4310317825"],"host_organization_lineage_names":["Inderscience Publishers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Information and Communication Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5073461448","display_name":"Ling Ji","orcid":"https://orcid.org/0000-0003-1140-3174"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ling Ji","raw_affiliation_strings":["Information Technology College, Nantong Normal College, Nantong 226010, China"],"affiliations":[{"raw_affiliation_string":"Information Technology College, Nantong Normal College, Nantong 226010, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5073461448"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.2754,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.67443028,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"19","issue":"1","first_page":"76","last_page":"76"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13647","display_name":"AI and Big Data Applications","score":0.809499979019165,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13647","display_name":"AI and Big Data Applications","score":0.809499979019165,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13027","display_name":"Applied Advanced Technologies","score":0.761900007724762,"subfield":{"id":"https://openalex.org/subfields/2209","display_name":"Industrial and Manufacturing Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14155","display_name":"Advanced Technology in Applications","score":0.7396000027656555,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/backup","display_name":"Backup","score":0.9369112253189087},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8343300223350525},{"id":"https://openalex.org/keywords/data-deduplication","display_name":"Data deduplication","score":0.6188123822212219},{"id":"https://openalex.org/keywords/value","display_name":"Value (mathematics)","score":0.5245085954666138},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4903569221496582},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.42819860577583313},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.35901930928230286},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.08457410335540771}],"concepts":[{"id":"https://openalex.org/C2780945871","wikidata":"https://www.wikidata.org/wiki/Q194274","display_name":"Backup","level":2,"score":0.9369112253189087},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8343300223350525},{"id":"https://openalex.org/C32587265","wikidata":"https://www.wikidata.org/wiki/Q1182260","display_name":"Data deduplication","level":2,"score":0.6188123822212219},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.5245085954666138},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4903569221496582},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.42819860577583313},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.35901930928230286},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.08457410335540771},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1504/ijict.2021.116556","is_oa":false,"landing_page_url":"https://doi.org/10.1504/ijict.2021.116556","pdf_url":null,"source":{"id":"https://openalex.org/S168803321","display_name":"International Journal of Information and Communication Technology","issn_l":"1466-6642","issn":["1466-6642","1741-8070"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310317825","host_organization_name":"Inderscience Publishers","host_organization_lineage":["https://openalex.org/P4310317825"],"host_organization_lineage_names":["Inderscience Publishers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Information and Communication Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/1","display_name":"No poverty","score":0.6499999761581421}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W1437397736","https://openalex.org/W4285245558","https://openalex.org/W2048606985","https://openalex.org/W2375056709","https://openalex.org/W4301155776","https://openalex.org/W2012359782","https://openalex.org/W2952411620","https://openalex.org/W2348132657","https://openalex.org/W1481447012","https://openalex.org/W4236228546"],"abstract_inverted_index":{"In":[0],"order":[1],"to":[2,56],"overcome":[3],"the":[4,50,58,62,67,70,75,79,87,91,94,97,104,110,118,126,135],"problems":[5],"of":[6,12,24,42,61,81,121],"poor":[7],"effect":[8,138],"and":[9,86,100,134],"low":[10],"efficiency":[11,129],"traditional":[13],"information":[14,27,44,85],"de-duplication":[15,22,102],"methods,":[16],"this":[17,36,122],"paper":[18,37],"proposes":[19],"a":[20],"fast":[21],"method":[23,123],"text":[25,43,82],"backup":[26,84],"in":[28,46],"library":[29,47],"database":[30],"based":[31],"on":[32],"big":[33],"data.":[34],"Firstly,":[35],"carries":[38],"out":[39],"parallel":[40],"mining":[41],"features":[45,51],"database,":[48],"uses":[49],"with":[52,69],"strong":[53],"classification":[54],"ability":[55],"determine":[57],"parameter":[59,71],"value":[60,72],"repeated":[63,83],"feature":[64],"function,":[65],"obtains":[66],"entries":[68],"higher":[73],"than":[74,109],"threshold":[76,111],"value,":[77],"determines":[78],"number":[80],"group":[88],"weight,":[89],"sets":[90],"difference":[92],"between":[93],"two":[95],"as":[96],"remaining":[98,105],"digits,":[99],"stops":[101],"when":[103],"digits":[106],"are":[107],"lower":[108],"value.":[112],"The":[113],"experimental":[114],"results":[115],"show":[116],"that":[117],"average":[119],"accuracy":[120],"is":[124,130,139],"96.95%,":[125],"weight":[127,136],"removal":[128,137],"always":[131],"above":[132],"98%,":[133],"good.":[140]},"counts_by_year":[{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
