{"id":"https://openalex.org/W4409076718","doi":"https://doi.org/10.1109/access.2025.3556899","title":"Paraphrase Identification With Deep Learning: A Review of Datasets and Methods","display_name":"Paraphrase Identification With Deep Learning: A Review of Datasets and Methods","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4409076718","doi":"https://doi.org/10.1109/access.2025.3556899"},"language":"en","primary_location":{"id":"doi:10.1109/access.2025.3556899","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2025.3556899","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"review","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1109/access.2025.3556899","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100754453","display_name":"Chao Zhou","orcid":"https://orcid.org/0009-0004-1437-9573"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chao Zhou","raw_affiliation_strings":["SingularDance, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"SingularDance, Shanghai, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064372450","display_name":"Cheng Qiu","orcid":"https://orcid.org/0000-0002-1941-1614"},"institutions":[{"id":"https://openalex.org/I200719446","display_name":"Vanderbilt University","ror":"https://ror.org/02vm5rt34","country_code":"US","type":"education","lineage":["https://openalex.org/I200719446"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Cheng Qiu","raw_affiliation_strings":["College of Arts and Science, Vanderbilt University, Nashville, TN, USA","The College of Arts and Science, Vanderbilt University, Nashville, TN, USA"],"affiliations":[{"raw_affiliation_string":"College of Arts and Science, Vanderbilt University, Nashville, TN, USA","institution_ids":["https://openalex.org/I200719446"]},{"raw_affiliation_string":"The College of Arts and Science, Vanderbilt University, Nashville, TN, USA","institution_ids":["https://openalex.org/I200719446"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042302841","display_name":"Lizhen Liang","orcid":"https://orcid.org/0000-0001-9329-2767"},"institutions":[{"id":"https://openalex.org/I70983195","display_name":"Syracuse University","ror":"https://ror.org/025r5qe02","country_code":"US","type":"education","lineage":["https://openalex.org/I70983195"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lizhen Liang","raw_affiliation_strings":["School of Information Science, Syracuse University, Syracuse, NY, USA","The School of Information Science, Syracuse University, Syracuse, NY, USA"],"affiliations":[{"raw_affiliation_string":"School of Information Science, Syracuse University, Syracuse, NY, USA","institution_ids":["https://openalex.org/I70983195"]},{"raw_affiliation_string":"The School of Information Science, Syracuse University, Syracuse, NY, USA","institution_ids":["https://openalex.org/I70983195"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5069191647","display_name":"Daniel E. Acu\u00f1a","orcid":"https://orcid.org/0000-0002-7765-1595"},"institutions":[{"id":"https://openalex.org/I188538660","display_name":"University of Colorado Boulder","ror":"https://ror.org/02ttsq026","country_code":"US","type":"education","lineage":["https://openalex.org/I188538660"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Daniel E. Acuna","raw_affiliation_strings":["Department of Computer Science, University of Colorado at Boulder, Boulder, CO, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Colorado at Boulder, Boulder, CO, USA","institution_ids":["https://openalex.org/I188538660"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100754453"],"corresponding_institution_ids":[],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":25.5728,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.99381826,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"13","issue":null,"first_page":"65797","last_page":"65822"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9857000112533569,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.933899998664856,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/paraphrase","display_name":"Paraphrase","score":0.9177025556564331},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7721256613731384},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6701520681381226},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.6634650230407715},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.556576132774353},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5039030909538269},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.42242762446403503},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.34015804529190063}],"concepts":[{"id":"https://openalex.org/C2780922921","wikidata":"https://www.wikidata.org/wiki/Q255189","display_name":"Paraphrase","level":2,"score":0.9177025556564331},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7721256613731384},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6701520681381226},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.6634650230407715},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.556576132774353},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5039030909538269},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.42242762446403503},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.34015804529190063},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2025.3556899","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2025.3556899","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:02cbd75a1f65445aa54a67759ac8ed6f","is_oa":true,"landing_page_url":"https://doaj.org/article/02cbd75a1f65445aa54a67759ac8ed6f","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 13, Pp 65797-65822 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2025.3556899","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2025.3556899","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2262510621","display_name":null,"funder_award_id":"ORIIR180041","funder_id":"https://openalex.org/F4320306085","funder_display_name":"U.S. Department of Health and Human Services"},{"id":"https://openalex.org/G5427532021","display_name":null,"funder_award_id":"ORIIIR190049","funder_id":"https://openalex.org/F4320306085","funder_display_name":"U.S. Department of Health and Human Services"},{"id":"https://openalex.org/G5960220269","display_name":null,"funder_award_id":"ORIIIR210062","funder_id":"https://openalex.org/F4320306085","funder_display_name":"U.S. Department of Health and Human Services"},{"id":"https://openalex.org/G6072281559","display_name":null,"funder_award_id":"ORIIIR200052","funder_id":"https://openalex.org/F4320306085","funder_display_name":"U.S. Department of Health and Human Services"}],"funders":[{"id":"https://openalex.org/F4320306085","display_name":"U.S. Department of Health and Human Services","ror":"https://ror.org/033jnv181"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":158,"referenced_works":["https://openalex.org/W1498436455","https://openalex.org/W1603508585","https://openalex.org/W1965605789","https://openalex.org/W1967925097","https://openalex.org/W1967981232","https://openalex.org/W1990005915","https://openalex.org/W1993784968","https://openalex.org/W1995875735","https://openalex.org/W2016172157","https://openalex.org/W2032494091","https://openalex.org/W2054477000","https://openalex.org/W2078861931","https://openalex.org/W2081580037","https://openalex.org/W2088175415","https://openalex.org/W2100495367","https://openalex.org/W2104167780","https://openalex.org/W2104858828","https://openalex.org/W2110798204","https://openalex.org/W2117130368","https://openalex.org/W2118463056","https://openalex.org/W2119408773","https://openalex.org/W2130158090","https://openalex.org/W2146066482","https://openalex.org/W2147152072","https://openalex.org/W2148143831","https://openalex.org/W2153635508","https://openalex.org/W2157331557","https://openalex.org/W2158997610","https://openalex.org/W2229162816","https://openalex.org/W2250565861","https://openalex.org/W2265289447","https://openalex.org/W2293185259","https://openalex.org/W2581256107","https://openalex.org/W2593833795","https://openalex.org/W2773143256","https://openalex.org/W2792210162","https://openalex.org/W2808308446","https://openalex.org/W2882319491","https://openalex.org/W2889581211","https://openalex.org/W2893425640","https://openalex.org/W2904790185","https://openalex.org/W2963351448","https://openalex.org/W2963355447","https://openalex.org/W2963691377","https://openalex.org/W2964116568","https://openalex.org/W2964189376","https://openalex.org/W3010512657","https://openalex.org/W3014666486","https://openalex.org/W3034115845","https://openalex.org/W3044021949","https://openalex.org/W3094377605","https://openalex.org/W3094629756","https://openalex.org/W3101033885","https://openalex.org/W3102840413","https://openalex.org/W3212777043","https://openalex.org/W3214637961","https://openalex.org/W3217467900","https://openalex.org/W4210489841","https://openalex.org/W4286628214","https://openalex.org/W4312611635","https://openalex.org/W4383815588","https://openalex.org/W4385570752","https://openalex.org/W4389520401","https://openalex.org/W4392345938","https://openalex.org/W4392563948","https://openalex.org/W4399208420","https://openalex.org/W4401634806","https://openalex.org/W4402684133","https://openalex.org/W4402684250","https://openalex.org/W4404368749","https://openalex.org/W4405087843","https://openalex.org/W4406783820","https://openalex.org/W6601301254","https://openalex.org/W6603974228","https://openalex.org/W6605323724","https://openalex.org/W6629028937","https://openalex.org/W6629411485","https://openalex.org/W6633661181","https://openalex.org/W6635189695","https://openalex.org/W6636510571","https://openalex.org/W6675053379","https://openalex.org/W6675195607","https://openalex.org/W6675525543","https://openalex.org/W6675912799","https://openalex.org/W6678170489","https://openalex.org/W6678193448","https://openalex.org/W6678446668","https://openalex.org/W6678476229","https://openalex.org/W6679434410","https://openalex.org/W6679470533","https://openalex.org/W6680012447","https://openalex.org/W6680239946","https://openalex.org/W6680532216","https://openalex.org/W6680890276","https://openalex.org/W6681262288","https://openalex.org/W6681794102","https://openalex.org/W6681983134","https://openalex.org/W6682118243","https://openalex.org/W6683258052","https://openalex.org/W6684743669","https://openalex.org/W6684794618","https://openalex.org/W6685160515","https://openalex.org/W6688494211","https://openalex.org/W6691132426","https://openalex.org/W6691299368","https://openalex.org/W6691431627","https://openalex.org/W6691457240","https://openalex.org/W6691659672","https://openalex.org/W6696831472","https://openalex.org/W6697106991","https://openalex.org/W6703902474","https://openalex.org/W6720018875","https://openalex.org/W6728698659","https://openalex.org/W6729900021","https://openalex.org/W6730868226","https://openalex.org/W6732173219","https://openalex.org/W6737725434","https://openalex.org/W6739425704","https://openalex.org/W6739901393","https://openalex.org/W6741626902","https://openalex.org/W6742059102","https://openalex.org/W6743583902","https://openalex.org/W6746055214","https://openalex.org/W6748328696","https://openalex.org/W6751686549","https://openalex.org/W6751817261","https://openalex.org/W6751888212","https://openalex.org/W6754612000","https://openalex.org/W6755207826","https://openalex.org/W6761551260","https://openalex.org/W6765039553","https://openalex.org/W6768021236","https://openalex.org/W6769627184","https://openalex.org/W6771917389","https://openalex.org/W6776467053","https://openalex.org/W6778883912","https://openalex.org/W6778969436","https://openalex.org/W6784032159","https://openalex.org/W6784284637","https://openalex.org/W6785493357","https://openalex.org/W6789873034","https://openalex.org/W6801097528","https://openalex.org/W6803017251","https://openalex.org/W6846636818","https://openalex.org/W6846643419","https://openalex.org/W6848670183","https://openalex.org/W6849274154","https://openalex.org/W6850824531","https://openalex.org/W6852974784","https://openalex.org/W6853244435","https://openalex.org/W6858524923","https://openalex.org/W6861327541","https://openalex.org/W6861675364","https://openalex.org/W6862521569","https://openalex.org/W6873350679","https://openalex.org/W6874871274","https://openalex.org/W6898505805","https://openalex.org/W6940318546"],"related_works":["https://openalex.org/W2978707643","https://openalex.org/W2294233724","https://openalex.org/W4378713476","https://openalex.org/W2169813772","https://openalex.org/W2736149021","https://openalex.org/W4310803295","https://openalex.org/W2007563177","https://openalex.org/W4248451614","https://openalex.org/W1973985309","https://openalex.org/W3132357981"],"abstract_inverted_index":{"The":[0],"rapid":[1],"advancement":[2],"of":[3,34,65,136],"Natural":[4],"Language":[5,77],"Processing":[6],"(NLP)":[7],"has":[8],"greatly":[9],"improved":[10],"text-generation":[11],"tools":[12],"like":[13],"ChatGPT":[14],"and":[15,55,90,99,146,161,181,189],"Claude,":[16],"offering":[17],"significant":[18,124],"utility":[19],"but":[20],"also":[21],"posing":[22],"risks":[23],"to":[24,58,111,154,171,195],"media":[25],"credibility":[26],"through":[27],"paraphrased":[28],"plagiarism\u2014a":[29],"subtle":[30],"yet":[31],"widespread":[32],"form":[33],"content":[35],"misuse.":[36],"Despite":[37],"progress":[38],"in":[39,44,69,126],"automated":[40],"paraphrase":[41,59,67,97,127,198],"detection,":[42],"inconsistencies":[43],"training":[45,75,180],"datasets":[46],"often":[47],"limit":[48],"their":[49,147],"effectiveness.":[50],"This":[51],"study":[52],"examines":[53],"traditional":[54],"modern":[56],"approaches":[57],"identification,":[60],"revealing":[61],"how":[62],"the":[63,101,115,120,137],"under-representation":[64],"certain":[66],"types":[68],"widely-used":[70],"datasets,":[71],"including":[72],"those":[73],"for":[74,164,177,192],"Large":[76],"Models":[78],"(LLMs),":[79],"undermines":[80],"plagiarism":[81],"detection":[82],"accuracy.":[83],"To":[84],"address":[85,155],"these":[86],"issues,":[87],"we":[88,118,184],"introduce":[89],"validate":[91],"<sc":[92],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[93],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">ReParaphrased</small>,":[94],"a":[95,173],"refined":[96],"typology,":[98],"extend":[100],"Extended":[102],"Typology":[103],"Paraphrase":[104],"Corpus":[105],"(ETPC)":[106],"with":[107],"meticulous":[108],"manual":[109],"annotations":[110],"enhance":[112],"reliability.":[113],"Using":[114],"augmented":[116],"ETPC,":[117],"fine-tune":[119],"LLama3.1-7B-instruct":[121],"model,":[122],"uncovering":[123],"disparities":[125],"type":[128],"distribution":[129],"across":[130],"existing":[131],"datasets.":[132],"A":[133],"detailed":[134],"analysis":[135],"MRPC":[138],"benchmark":[139],"dataset":[140,156,166,193],"further":[141],"highlights":[142],"critical":[143],"distributional":[144],"issues":[145],"implications.":[148],"We":[149],"propose":[150],"four":[151],"key":[152],"solutions":[153],"limitations,":[157],"providing":[158],"both":[159],"theoretical":[160],"practical":[162],"guidance":[163],"improving":[165],"quality.":[167],"These":[168],"contributions":[169],"aim":[170],"establish":[172],"more":[174],"robust":[175],"foundation":[176],"NLP":[178],"model":[179],"evaluation.":[182],"Finally,":[183],"outline":[185],"future":[186],"research":[187],"directions":[188],"suggest":[190],"improvements":[191],"development":[194],"advance":[196],"AI-driven":[197],"detection.":[199]},"counts_by_year":[{"year":2025,"cited_by_count":9}],"updated_date":"2025-12-23T23:11:35.936235","created_date":"2025-10-10T00:00:00"}
