{"id":"https://openalex.org/W2773854455","doi":"https://doi.org/10.1631/fitee.1601344","title":"A machine learning approach to query generation in plagiarism source retrieval","display_name":"A machine learning approach to query generation in plagiarism source retrieval","publication_year":2017,"publication_date":"2017-10-01","ids":{"openalex":"https://openalex.org/W2773854455","doi":"https://doi.org/10.1631/fitee.1601344","mag":"2773854455"},"language":"en","primary_location":{"id":"doi:10.1631/fitee.1601344","is_oa":false,"landing_page_url":"https://doi.org/10.1631/fitee.1601344","pdf_url":null,"source":{"id":"https://openalex.org/S4210189857","display_name":"Frontiers of Information Technology & Electronic Engineering","issn_l":"2095-9184","issn":["2095-9184","2095-9230"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers of Information Technology &amp; Electronic Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5090227237","display_name":"Leilei Kong","orcid":"https://orcid.org/0000-0002-4636-3507"},"institutions":[{"id":"https://openalex.org/I151727225","display_name":"Harbin Engineering University","ror":"https://ror.org/03x80pn82","country_code":"CN","type":"education","lineage":["https://openalex.org/I151727225"]},{"id":"https://openalex.org/I4210161462","display_name":"Heilongjiang Institute of Technology","ror":"https://ror.org/05x0m9n95","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210161462"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Lei-lei Kong","raw_affiliation_strings":["College of Information and Communication Engineering, Harbin Engineering University, Harbin, 150001, China","School of Computer Science and Technology, Heilongjiang Institute of Technology, Harbin, 150050, China"],"affiliations":[{"raw_affiliation_string":"College of Information and Communication Engineering, Harbin Engineering University, Harbin, 150001, China","institution_ids":["https://openalex.org/I151727225"]},{"raw_affiliation_string":"School of Computer Science and Technology, Heilongjiang Institute of Technology, Harbin, 150050, China","institution_ids":["https://openalex.org/I4210161462"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029456452","display_name":"Zhimao Lu","orcid":null},"institutions":[{"id":"https://openalex.org/I151727225","display_name":"Harbin Engineering University","ror":"https://ror.org/03x80pn82","country_code":"CN","type":"education","lineage":["https://openalex.org/I151727225"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhi-mao Lu","raw_affiliation_strings":["College of Information and Communication Engineering, Harbin Engineering University, Harbin, 150001, China"],"affiliations":[{"raw_affiliation_string":"College of Information and Communication Engineering, Harbin Engineering University, Harbin, 150001, China","institution_ids":["https://openalex.org/I151727225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042722970","display_name":"Haoliang Qi","orcid":"https://orcid.org/0000-0003-1321-5820"},"institutions":[{"id":"https://openalex.org/I4210161462","display_name":"Heilongjiang Institute of Technology","ror":"https://ror.org/05x0m9n95","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210161462"]},{"id":"https://openalex.org/I4391767988","display_name":"State Key Laboratory of Digital Publishing Technology","ror":"https://ror.org/021s6g098","country_code":null,"type":"facility","lineage":["https://openalex.org/I20231570","https://openalex.org/I4391767988"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hao-liang Qi","raw_affiliation_strings":["School of Computer Science and Technology, Heilongjiang Institute of Technology, Harbin, 150050, China","State Key Laboratory of Digital Publishing Technology, Beijing 100871, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Heilongjiang Institute of Technology, Harbin, 150050, China","institution_ids":["https://openalex.org/I4210161462"]},{"raw_affiliation_string":"State Key Laboratory of Digital Publishing Technology, Beijing 100871, China","institution_ids":["https://openalex.org/I4391767988"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017588481","display_name":"Zhongyuan Han","orcid":"https://orcid.org/0000-0001-8960-9872"},"institutions":[{"id":"https://openalex.org/I4210161462","display_name":"Heilongjiang Institute of Technology","ror":"https://ror.org/05x0m9n95","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210161462"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhong-yuan Han","raw_affiliation_strings":["School of Computer Science and Technology, Heilongjiang Institute of Technology, Harbin, 150050, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Heilongjiang Institute of Technology, Harbin, 150050, China","institution_ids":["https://openalex.org/I4210161462"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5090227237"],"corresponding_institution_ids":["https://openalex.org/I151727225","https://openalex.org/I4210161462"],"apc_list":null,"apc_paid":null,"fwci":0.195,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.64037453,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"18","issue":"10","first_page":"1556","last_page":"1572"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11492","display_name":"Academic integrity and plagiarism","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9918000102043152,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8699606657028198},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.7198123335838318},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.707123875617981},{"id":"https://openalex.org/keywords/plagiarism-detection","display_name":"Plagiarism detection","score":0.6912661790847778},{"id":"https://openalex.org/keywords/heuristic","display_name":"Heuristic","score":0.6473259329795837},{"id":"https://openalex.org/keywords/learning-to-rank","display_name":"Learning to rank","score":0.5181006789207458},{"id":"https://openalex.org/keywords/query-expansion","display_name":"Query expansion","score":0.5080838203430176},{"id":"https://openalex.org/keywords/rank","display_name":"Rank (graph theory)","score":0.4981880187988281},{"id":"https://openalex.org/keywords/document-retrieval","display_name":"Document retrieval","score":0.4876362383365631},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.44279226660728455},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.36399102210998535},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.35166388750076294}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8699606657028198},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.7198123335838318},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.707123875617981},{"id":"https://openalex.org/C2780907237","wikidata":"https://www.wikidata.org/wiki/Q2986238","display_name":"Plagiarism detection","level":2,"score":0.6912661790847778},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.6473259329795837},{"id":"https://openalex.org/C86037889","wikidata":"https://www.wikidata.org/wiki/Q4330127","display_name":"Learning to rank","level":3,"score":0.5181006789207458},{"id":"https://openalex.org/C99016210","wikidata":"https://www.wikidata.org/wiki/Q5488129","display_name":"Query expansion","level":2,"score":0.5080838203430176},{"id":"https://openalex.org/C164226766","wikidata":"https://www.wikidata.org/wiki/Q7293202","display_name":"Rank (graph theory)","level":2,"score":0.4981880187988281},{"id":"https://openalex.org/C161156560","wikidata":"https://www.wikidata.org/wiki/Q1638872","display_name":"Document retrieval","level":2,"score":0.4876362383365631},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.44279226660728455},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36399102210998535},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35166388750076294},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1631/fitee.1601344","is_oa":false,"landing_page_url":"https://doi.org/10.1631/fitee.1601344","pdf_url":null,"source":{"id":"https://openalex.org/S4210189857","display_name":"Frontiers of Information Technology & Electronic Engineering","issn_l":"2095-9184","issn":["2095-9184","2095-9230"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers of Information Technology &amp; Electronic Engineering","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4565661271","display_name":null,"funder_award_id":"61370170","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W1480376833","https://openalex.org/W1554944419","https://openalex.org/W1970222407","https://openalex.org/W1974336599","https://openalex.org/W1996430422","https://openalex.org/W2009571180","https://openalex.org/W2041615247","https://openalex.org/W2047221353","https://openalex.org/W2067802667","https://openalex.org/W2076470289","https://openalex.org/W2119821739","https://openalex.org/W2125398996","https://openalex.org/W2147528976","https://openalex.org/W2147742901","https://openalex.org/W2251179870","https://openalex.org/W2319551453","https://openalex.org/W2343570277","https://openalex.org/W2400384985","https://openalex.org/W2400796284","https://openalex.org/W2402211003","https://openalex.org/W2405094145","https://openalex.org/W2407376471","https://openalex.org/W2407707865","https://openalex.org/W2408382702","https://openalex.org/W2988119488","https://openalex.org/W3040704318","https://openalex.org/W4241676240","https://openalex.org/W4285719527","https://openalex.org/W6600045125","https://openalex.org/W6600157343","https://openalex.org/W6603698848"],"related_works":["https://openalex.org/W3127142483","https://openalex.org/W4385565564","https://openalex.org/W2898073868","https://openalex.org/W2138488530","https://openalex.org/W2798835721","https://openalex.org/W2971071571","https://openalex.org/W2387658907","https://openalex.org/W2922169395","https://openalex.org/W2385796165","https://openalex.org/W25098770"],"abstract_inverted_index":{"Plagiarism":[0],"source":[1,46,82,90,147,162,205,228,247,278],"retrieval":[2,91,148,163,229,248,279],"is":[3,37,149,188,230],"the":[4,13,20,28,40,56,71,95,112,115,121,132,136,160,181,189,199,209,222,239,243,253,256],"core":[5],"task":[6],"of":[7,39,97,114,201,212,215,224,238],"plagiarism":[8,16,29,45],"detection.":[9],"It":[10],"has":[11,62],"become":[12],"standard":[14],"for":[15,81,89,123,146,165,204,218,227],"detection":[17],"to":[18,26,101,130,143,158,175,177,192,197,220,252],"use":[19],"queries":[21,32,80,134,179],"extracted":[22],"from":[23,33,135,180],"suspicious":[24,35,75,167],"documents":[25],"retrieve":[27],"sources.":[30],"Generating":[31],"a":[34,124,152],"document":[36,76,168],"one":[38,68],"most":[41],"important":[42],"steps":[43],"in":[44,55,103,277],"retrieval.":[47,83,206],"Heuristic-based":[48],"query":[49,144,202,264],"generation":[50,145,203,265],"methods":[51,88,108,196],"are":[52],"widely":[53],"used":[54],"current":[57],"research.":[58],"Each":[59],"heuristic-based":[60],"method":[61,172,241,266],"its":[63],"own":[64],"advantages,":[65],"and":[66],"no":[67],"statistically":[69,272],"outperforms":[70],"others":[72],"on":[73,86,94,242,268],"all":[74],"segments":[77],"when":[78],"generating":[79],"Further":[84],"improvements":[85,274],"heuristic":[87,107],"rely":[92],"mainly":[93],"experience":[96],"experts.":[98],"This":[99,118],"leads":[100],"difficulties":[102],"putting":[104],"forward":[105],"new":[106,125],"that":[109,260],"can":[110],"overcome":[111],"shortcomings":[113],"existing":[116],"ones.":[117],"paper":[119],"paves":[120],"way":[122],"statistical":[126,139],"machine":[127,140,194,269],"learning":[128,141,174,195,219,270],"approach":[129,142],"select":[131],"best":[133],"candidates.":[137,182],"The":[138,170],"formulated":[150],"as":[151],"ranking":[153],"framework.":[154],"Specifically,":[155],"it":[156],"aims":[157],"achieve":[159],"optimal":[161],"performance":[164],"each":[166],"segment.":[169],"proposed":[171,240,263],"exploits":[173],"rank":[176],"generate":[178],"To":[183,207],"our":[184,186,262],"knowledge,":[185],"work":[187],"first":[190],"research":[191],"apply":[193],"resolve":[198],"problem":[200,211],"solve":[208],"essential":[210],"an":[213],"absence":[214],"training":[216,225],"data":[217],"rank,":[221],"building":[223],"samples":[226],"also":[231],"conducted.":[232],"We":[233],"rigorously":[234],"evaluate":[235],"various":[236],"aspects":[237],"publicly":[244],"available":[245],"PAN":[246],"corpus.":[249],"With":[250],"respect":[251],"established":[254],"baselines,":[255],"experimental":[257],"results":[258],"show":[259],"applying":[261],"based":[267],"yields":[271],"significant":[273],"over":[275],"baselines":[276],"effectiveness.":[280]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2019,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
