{"id":"https://openalex.org/W2092438018","doi":"https://doi.org/10.1145/2371316.2371336","title":"A software tool for building a statistical prefix processor","display_name":"A software tool for building a statistical prefix processor","publication_year":2012,"publication_date":"2012-09-16","ids":{"openalex":"https://openalex.org/W2092438018","doi":"https://doi.org/10.1145/2371316.2371336","mag":"2092438018"},"language":"en","primary_location":{"id":"doi:10.1145/2371316.2371336","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2371316.2371336","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Fifth Balkan Conference in Informatics","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5081787499","display_name":"Nikitas \u039d. Karanikolas","orcid":"https://orcid.org/0000-0003-1777-892X"},"institutions":[{"id":"https://openalex.org/I40479246","display_name":"Technological Educational Institute of Athens","ror":"https://ror.org/044m46d61","country_code":"GR","type":"education","lineage":["https://openalex.org/I40479246"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Nikitas Karanikolas","raw_affiliation_strings":["TEI of Athens, Aigaleo, Greece"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"TEI of Athens, Aigaleo, Greece","institution_ids":["https://openalex.org/I40479246"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046792635","display_name":"Michael Vassilakopoulos","orcid":"https://orcid.org/0000-0003-2256-5523"},"institutions":[{"id":"https://openalex.org/I179161674","display_name":"Technological Educational Institute of Central Greece","ror":"https://ror.org/03ed9w591","country_code":"GR","type":"education","lineage":["https://openalex.org/I179161674"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Michael Vassilakopoulos","raw_affiliation_strings":["University of Central Greece, Lamia, Greece"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Central Greece, Lamia, Greece","institution_ids":["https://openalex.org/I179161674"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5025039145","display_name":"Nektarios Giannoulis","orcid":null},"institutions":[{"id":"https://openalex.org/I40479246","display_name":"Technological Educational Institute of Athens","ror":"https://ror.org/044m46d61","country_code":"GR","type":"education","lineage":["https://openalex.org/I40479246"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Nektarios Giannoulis","raw_affiliation_strings":["TEI of Athens, Aigaleo, Greece"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"TEI of Athens, Aigaleo, Greece","institution_ids":["https://openalex.org/I40479246"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.435,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.73458836,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"100","last_page":"105"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/prefix","display_name":"Prefix","score":0.9348273873329163},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8332052230834961},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.6916565895080566},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6537314057350159},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.6423682570457458},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5197107791900635},{"id":"https://openalex.org/keywords/suffix","display_name":"Suffix","score":0.45418867468833923},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.44961753487586975},{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.4332399368286133},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.43315353989601135},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3696230351924896},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.1951574683189392},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.11257973313331604},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10387080907821655}],"concepts":[{"id":"https://openalex.org/C141603448","wikidata":"https://www.wikidata.org/wiki/Q134830","display_name":"Prefix","level":2,"score":0.9348273873329163},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8332052230834961},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.6916565895080566},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6537314057350159},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.6423682570457458},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5197107791900635},{"id":"https://openalex.org/C2779804580","wikidata":"https://www.wikidata.org/wiki/Q102047","display_name":"Suffix","level":2,"score":0.45418867468833923},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.44961753487586975},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.4332399368286133},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.43315353989601135},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3696230351924896},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.1951574683189392},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.11257973313331604},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10387080907821655},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/2371316.2371336","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2371316.2371336","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Fifth Balkan Conference in Informatics","raw_type":"proceedings-article"},{"id":"pmh:oai:ir.lib.uth.gr:11615/29058","is_oa":false,"landing_page_url":"http://hdl.handle.net/11615/29058","pdf_url":null,"source":{"id":"https://openalex.org/S4306400243","display_name":"University of Thessaly Institutional Repository (University of Thessaly)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I145722265","host_organization_name":"University of Thessaly","host_organization_lineage":["https://openalex.org/I145722265"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.scopus.com/inward/record.url?eid=2-s2.0-84867373036&amp;partnerID=40&amp;md5=ec25ac272f0788bb6839a86414a70f00","raw_type":"conferenceItem"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.7300000190734863}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W1500117362","https://openalex.org/W1956559956","https://openalex.org/W2018927814","https://openalex.org/W2046732239","https://openalex.org/W2098162425","https://openalex.org/W2107585977","https://openalex.org/W2112924836","https://openalex.org/W2118020653"],"related_works":["https://openalex.org/W2022951409","https://openalex.org/W4385652842","https://openalex.org/W3196825089","https://openalex.org/W4368367671","https://openalex.org/W2413189204","https://openalex.org/W3012663989","https://openalex.org/W4318200119","https://openalex.org/W2773803767","https://openalex.org/W4381095475","https://openalex.org/W2356412146"],"abstract_inverted_index":{"Information":[0],"Retrieval":[1],"or":[2],"Text":[3],"Classification":[4],"need":[5],"to":[6,86],"match":[7],"words":[8,23,30,134,161,185],"between":[9,44],"the":[10,14,59,63,73,93,105,118,155,170,180,192,200],"user's":[11],"input":[12],"and":[13,34,56,107,135,190,218],"documents":[15],"in":[16,76],"a":[17,26,67,110,122,126,138,163,173],"collection":[18,164,202],"of":[19,22,66,109,121,141,151,160,165,175],"texts.":[20],"Matching":[21],"is":[24,149],"not":[25],"trivial":[27],"process":[28,179],"since":[29],"have":[31],"grammatical":[32],"(inflectional":[33],"derivational)":[35],"variations.":[36],"There":[37],"are":[38,167],"two":[39,152],"main":[40],"approaches":[41,71],"for":[42,214],"matching":[43],"inflected":[45,60],"words:":[46],"Stemming":[47],"(removing":[48],"word":[49,74,90,98,217],"suffixes":[50],"based":[51],"on":[52,89],"ad-hoc":[53],"selected":[54],"suffixes)":[55],"Lemmatizing":[57],"(replacing":[58],"form":[61,65],"with":[62,195],"base":[64],"word).":[68],"However,":[69],"these":[70],"normalize":[72],"variations":[75],"their":[77],"rightmost":[78],"side.":[79],"We":[80],"claim":[81],"it":[82],"will":[83],"be":[84,115,212],"beneficial":[85],"additionally":[87],"concentrate":[88],"normalization":[91],"at":[92],"left":[94],"side,":[95],"by":[96,206],"removing":[97],"prefixes.":[99,221],"In":[100],"this":[101],"report,":[102],"we":[103,147],"present":[104,148],"architecture":[106],"functioning":[108],"software":[111],"tool":[112,146],"that":[113,128,186],"can":[114,211],"used":[116,194],"as":[117,137],"first":[119,156],"stage":[120,140],"Statistical":[123],"Prefix":[124],"Processor,":[125],"system":[127],"could":[129],"effectively":[130],"remove":[131],"prefixes":[132,159,193],"from":[133],"act":[136],"preprocessing":[139],"text":[142,181,201],"analysis":[143],"applications.":[144],"The":[145],"comprised":[150],"stages/subtools.":[153],"During":[154,169],"stage,":[157,172],"possible":[158],"within":[162],"texts":[166],"identified.":[168],"second":[171],"number":[174],"users":[176],"(native":[177],"speakers)":[178],"collection,":[182],"automatically":[183],"locate":[184],"contain":[187],"each":[188,196,215],"stem":[189],"characterize":[191],"stemmed":[197,216],"word.":[198],"After":[199],"has":[203],"been":[204],"processed":[205],"all":[207],"users,":[208],"statistical":[209],"conclusions":[210],"drawn":[213],"its":[219],"associated":[220]},"counts_by_year":[{"year":2013,"cited_by_count":1}],"updated_date":"2026-07-02T09:51:11.867554","created_date":"2025-10-10T00:00:00"}
