{"id":"https://openalex.org/W2922493889","doi":"https://doi.org/10.1109/istel.2018.8661095","title":"PerKey: A Persian News Corpus for Keyphrase Extraction and Generation","display_name":"PerKey: A Persian News Corpus for Keyphrase Extraction and Generation","publication_year":2018,"publication_date":"2018-12-01","ids":{"openalex":"https://openalex.org/W2922493889","doi":"https://doi.org/10.1109/istel.2018.8661095","mag":"2922493889"},"language":"en","primary_location":{"id":"doi:10.1109/istel.2018.8661095","is_oa":false,"landing_page_url":"https://doi.org/10.1109/istel.2018.8661095","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 9th International Symposium on Telecommunications (IST)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2009.12269","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Ehsan Doostmohammadi","orcid":null},"institutions":[{"id":"https://openalex.org/I133529467","display_name":"Sharif University of Technology","ror":"https://ror.org/024c2fq17","country_code":"IR","type":"education","lineage":["https://openalex.org/I133529467"]}],"countries":["IR"],"is_corresponding":true,"raw_author_name":"Ehsan Doostmohammadi","raw_affiliation_strings":["Computational Linguistics Group, Sharif University of Technology, Tehran, Iran"],"affiliations":[{"raw_affiliation_string":"Computational Linguistics Group, Sharif University of Technology, Tehran, Iran","institution_ids":["https://openalex.org/I133529467"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Mohammad Hadi Bokaei","orcid":null},"institutions":[{"id":"https://openalex.org/I4210102178","display_name":"ICT Research Institute","ror":"https://ror.org/01a3g2z22","country_code":"IR","type":"facility","lineage":["https://openalex.org/I4210102178"]}],"countries":["IR"],"is_corresponding":false,"raw_author_name":"Mohammad Hadi Bokaei","raw_affiliation_strings":["Information Technology Department, ICT Research Institute, Tehran, Iran"],"affiliations":[{"raw_affiliation_string":"Information Technology Department, ICT Research Institute, Tehran, Iran","institution_ids":["https://openalex.org/I4210102178"]}]},{"author_position":"last","author":{"id":null,"display_name":"Hossein Sameti","orcid":null},"institutions":[{"id":"https://openalex.org/I133529467","display_name":"Sharif University of Technology","ror":"https://ror.org/024c2fq17","country_code":"IR","type":"education","lineage":["https://openalex.org/I133529467"]}],"countries":["IR"],"is_corresponding":false,"raw_author_name":"Hossein Sameti","raw_affiliation_strings":["Computer Engineering Department, Sharif University of Technology, Tehran, Iran"],"affiliations":[{"raw_affiliation_string":"Computer Engineering Department, Sharif University of Technology, Tehran, Iran","institution_ids":["https://openalex.org/I133529467"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I133529467"],"apc_list":null,"apc_paid":null,"fwci":0.1692,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.63669525,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"460","last_page":"465"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/persian","display_name":"Persian","score":0.824400007724762},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.5181000232696533},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.5069000124931335},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.4602999985218048},{"id":"https://openalex.org/keywords/keyword-extraction","display_name":"Keyword extraction","score":0.4156999886035919},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.3928000032901764},{"id":"https://openalex.org/keywords/text-corpus","display_name":"Text corpus","score":0.3375999927520752}],"concepts":[{"id":"https://openalex.org/C2776527531","wikidata":"https://www.wikidata.org/wiki/Q9168","display_name":"Persian","level":2,"score":0.824400007724762},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7696999907493591},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.7105000019073486},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6851999759674072},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.5181000232696533},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.5069000124931335},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.4602999985218048},{"id":"https://openalex.org/C2780288562","wikidata":"https://www.wikidata.org/wiki/Q25053353","display_name":"Keyword extraction","level":2,"score":0.4156999886035919},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.3928000032901764},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.387800008058548},{"id":"https://openalex.org/C2474386","wikidata":"https://www.wikidata.org/wiki/Q461183","display_name":"Text corpus","level":2,"score":0.3375999927520752},{"id":"https://openalex.org/C2777466982","wikidata":"https://www.wikidata.org/wiki/Q5227287","display_name":"Data extraction","level":3,"score":0.30640000104904175},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.2930000126361847},{"id":"https://openalex.org/C4725764","wikidata":"https://www.wikidata.org/wiki/Q844704","display_name":"Extraction (chemistry)","level":2,"score":0.2806999981403351},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2702000141143799},{"id":"https://openalex.org/C532629269","wikidata":"https://www.wikidata.org/wiki/Q865083","display_name":"Corpus linguistics","level":2,"score":0.2687999904155731},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.26170000433921814},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.25589999556541443},{"id":"https://openalex.org/C3020001037","wikidata":"https://www.wikidata.org/wiki/Q836575","display_name":"Quality assessment","level":3,"score":0.25519999861717224}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/istel.2018.8661095","is_oa":false,"landing_page_url":"https://doi.org/10.1109/istel.2018.8661095","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 9th International Symposium on Telecommunications (IST)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2009.12269","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2009.12269","pdf_url":"https://arxiv.org/pdf/2009.12269","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2009.12269","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2009.12269","pdf_url":"https://arxiv.org/pdf/2009.12269","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W1983873791","https://openalex.org/W2026306693","https://openalex.org/W2030903088","https://openalex.org/W2036596285","https://openalex.org/W2045181608","https://openalex.org/W2062233052","https://openalex.org/W2066636486","https://openalex.org/W2083214110","https://openalex.org/W2084364456","https://openalex.org/W2156577800","https://openalex.org/W2167329753","https://openalex.org/W2566480286","https://openalex.org/W2789995212","https://openalex.org/W2790109590","https://openalex.org/W2792059528","https://openalex.org/W2963265326","https://openalex.org/W4255165398","https://openalex.org/W6601310731","https://openalex.org/W6631501603","https://openalex.org/W6675317306","https://openalex.org/W6683944584","https://openalex.org/W6691640376","https://openalex.org/W6732414893"],"related_works":[],"abstract_inverted_index":{"Keyphrases":[0],"provide":[1],"an":[2],"extremely":[3],"dense":[4],"summary":[5],"of":[6,47,65,106,114],"a":[7,48,63],"text.":[8],"Such":[9],"information":[10,22],"can":[11],"be":[12],"used":[13],"in":[14],"many":[15],"Natural":[16],"Language":[17],"Processing":[18],"tasks,":[19],"such":[20],"as":[21],"retrieval":[23],"and":[24,74,87,117,131],"text":[25],"summarization.":[26],"Since":[27],"previous":[28],"studies":[29],"on":[30,125],"Persian":[31,71],"keyword":[32],"or":[33],"keyphrase":[34,51],"extraction":[35],"have":[36],"not":[37],"published":[38],"their":[39],"data,":[40],"the":[41,45,104,107,112,126],"field":[42],"suffers":[43],"from":[44,69],"lack":[46],"human":[49,100],"extracted":[50,81],"dataset.":[52],"In":[53],"this":[54],"paper,":[55],"we":[56],"introduce":[57],"PerKey":[58],"<sup":[59],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[60,134],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">1</sup>":[61],",":[62],"corpus":[64],"553k":[66],"news":[67,72],"articles":[68],"six":[70],"websites":[73],"agencies":[75],"with":[76],"relatively":[77],"high":[78],"quality":[79,92,105],"author":[80],"keyphrases,":[82],"which":[83],"is":[84],"then":[85],"filtered":[86],"cleaned":[88],"to":[89,102],"achieve":[90],"higher":[91],"keyphrases.":[93,108],"The":[94],"resulted":[95],"data":[96],"was":[97],"put":[98],"into":[99],"assessment":[101],"ensure":[103],"We":[109],"also":[110],"measured":[111],"performance":[113],"different":[115],"supervised":[116],"unsupervised":[118],"techniques,":[119],"e.g.":[120],"TFIDF,":[121],"MultipartiteRank,":[122],"KEA,":[123],"etc.":[124],"dataset":[127],"using":[128],"precision,":[129],"recall,":[130],"F":[132],"<sub":[133],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">1</sub>":[135],"-score.":[136]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2019,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2019-03-22T00:00:00"}
