{"id":"https://openalex.org/W2757554342","doi":"https://doi.org/10.15439/2017f432","title":"Big Data Language Model of Contemporary Polish","display_name":"Big Data Language Model of Contemporary Polish","publication_year":2017,"publication_date":"2017-09-24","ids":{"openalex":"https://openalex.org/W2757554342","doi":"https://doi.org/10.15439/2017f432","mag":"2757554342"},"language":"en","primary_location":{"id":"doi:10.15439/2017f432","is_oa":true,"landing_page_url":"https://doi.org/10.15439/2017f432","pdf_url":"https://annals-csis.org/proceedings/2017/drp/pdf/432.pdf","source":{"id":"https://openalex.org/S4220651875","display_name":"Annals of Computer Science and Information Systems","issn_l":"2300-5963","issn":["2300-5963"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":"https://openalex.org/P4310317484","host_organization_name":"Polskie Towarzystwo Informatyczne","host_organization_lineage":["https://openalex.org/P4310317484"],"host_organization_lineage_names":["Polskie Towarzystwo Informatyczne"],"type":"conference"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Annals of Computer Science and Information Systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://annals-csis.org/proceedings/2017/drp/pdf/432.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030149703","display_name":"Krzysztof Wo\u0142k","orcid":"https://orcid.org/0000-0001-5030-334X"},"institutions":[{"id":"https://openalex.org/I3017851245","display_name":"Polish-Japanese Academy of Information Technology","ror":"https://ror.org/01v542j61","country_code":"PL","type":"education","lineage":["https://openalex.org/I3017851245"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Krzysztof Wo\u0142k","raw_affiliation_strings":["Polish-Japanese Academy of Information Technology, ul. Koszykowa 86, 02-008 Warszawa, Poland"],"affiliations":[{"raw_affiliation_string":"Polish-Japanese Academy of Information Technology, ul. Koszykowa 86, 02-008 Warszawa, Poland","institution_ids":["https://openalex.org/I3017851245"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009169567","display_name":"Agnieszka Wo\u0142k","orcid":"https://orcid.org/0000-0002-9667-2068"},"institutions":[{"id":"https://openalex.org/I3017851245","display_name":"Polish-Japanese Academy of Information Technology","ror":"https://ror.org/01v542j61","country_code":"PL","type":"education","lineage":["https://openalex.org/I3017851245"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Agnieszka Wo\u0142k","raw_affiliation_strings":["Polish-Japanese Academy of Information Technology, ul. Koszykowa 86, 02-008 Warszawa, Poland"],"affiliations":[{"raw_affiliation_string":"Polish-Japanese Academy of Information Technology, ul. Koszykowa 86, 02-008 Warszawa, Poland","institution_ids":["https://openalex.org/I3017851245"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5031351366","display_name":"Krzysztof Marasek","orcid":"https://orcid.org/0000-0003-1344-3524"},"institutions":[{"id":"https://openalex.org/I3017851245","display_name":"Polish-Japanese Academy of Information Technology","ror":"https://ror.org/01v542j61","country_code":"PL","type":"education","lineage":["https://openalex.org/I3017851245"]}],"countries":["PL"],"is_corresponding":true,"raw_author_name":"Krzysztof Marasek","raw_affiliation_strings":["Polish-Japanese Academy of Information Technology, ul. Koszykowa 86, 02-008 Warszawa, Poland"],"affiliations":[{"raw_affiliation_string":"Polish-Japanese Academy of Information Technology, ul. Koszykowa 86, 02-008 Warszawa, Poland","institution_ids":["https://openalex.org/I3017851245"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5031351366"],"corresponding_institution_ids":["https://openalex.org/I3017851245"],"apc_list":null,"apc_paid":null,"fwci":0.2077,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.54345464,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"11","issue":null,"first_page":"389","last_page":"395"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9855999946594238,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9855999946594238,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12658","display_name":"Language and Culture","score":0.9729999899864197,"subfield":{"id":"https://openalex.org/subfields/3310","display_name":"Linguistics and Language"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13062","display_name":"Cognitive Computing and Networks","score":0.9689000248908997,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5991496443748474},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.5946478247642517},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.4456751346588135},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4008331894874573},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.3275109827518463},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.32045963406562805},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.1778460144996643},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.10518360137939453},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.09578284621238708}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5991496443748474},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.5946478247642517},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.4456751346588135},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4008331894874573},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.3275109827518463},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.32045963406562805},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.1778460144996643},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.10518360137939453},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.09578284621238708}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.15439/2017f432","is_oa":true,"landing_page_url":"https://doi.org/10.15439/2017f432","pdf_url":"https://annals-csis.org/proceedings/2017/drp/pdf/432.pdf","source":{"id":"https://openalex.org/S4220651875","display_name":"Annals of Computer Science and Information Systems","issn_l":"2300-5963","issn":["2300-5963"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":"https://openalex.org/P4310317484","host_organization_name":"Polskie Towarzystwo Informatyczne","host_organization_lineage":["https://openalex.org/P4310317484"],"host_organization_lineage_names":["Polskie Towarzystwo Informatyczne"],"type":"conference"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Annals of Computer Science and Information Systems","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.15439/2017f432","is_oa":true,"landing_page_url":"https://doi.org/10.15439/2017f432","pdf_url":"https://annals-csis.org/proceedings/2017/drp/pdf/432.pdf","source":{"id":"https://openalex.org/S4220651875","display_name":"Annals of Computer Science and Information Systems","issn_l":"2300-5963","issn":["2300-5963"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":"https://openalex.org/P4310317484","host_organization_name":"Polskie Towarzystwo Informatyczne","host_organization_lineage":["https://openalex.org/P4310317484"],"host_organization_lineage_names":["Polskie Towarzystwo Informatyczne"],"type":"conference"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Annals of Computer Science and Information Systems","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.7799999713897705,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2757554342.pdf","grobid_xml":"https://content.openalex.org/works/W2757554342.grobid-xml"},"referenced_works_count":31,"referenced_works":["https://openalex.org/W46893310","https://openalex.org/W127782651","https://openalex.org/W1631260214","https://openalex.org/W1742951243","https://openalex.org/W2012557190","https://openalex.org/W2013540053","https://openalex.org/W2079735306","https://openalex.org/W2097927681","https://openalex.org/W2101105183","https://openalex.org/W2109664771","https://openalex.org/W2115081467","https://openalex.org/W2119202242","https://openalex.org/W2122429665","https://openalex.org/W2124807415","https://openalex.org/W2130450156","https://openalex.org/W2133473895","https://openalex.org/W2145833060","https://openalex.org/W2150417504","https://openalex.org/W2158195707","https://openalex.org/W2251994258","https://openalex.org/W2270190199","https://openalex.org/W2329003026","https://openalex.org/W2595715041","https://openalex.org/W2916285486","https://openalex.org/W3204668696","https://openalex.org/W4285719527","https://openalex.org/W6636811518","https://openalex.org/W6676373471","https://openalex.org/W6679904954","https://openalex.org/W6898505805","https://openalex.org/W7044042833"],"related_works":["https://openalex.org/W4322629366","https://openalex.org/W2808989540","https://openalex.org/W2397053934","https://openalex.org/W1039292361","https://openalex.org/W2551093110","https://openalex.org/W4237919137","https://openalex.org/W3184179822","https://openalex.org/W3095362084","https://openalex.org/W2148016376","https://openalex.org/W3003361536"],"abstract_inverted_index":{"Based":[0],"on":[1,16,129,137],"big":[2,108],"data":[3,109],"training":[4],"we":[5],"provide":[6,76,89],"5-gram":[7],"language":[8,110,143],"models":[9],"of":[10,25,56,80,91,149,160],"contemporary":[11,92],"Polish":[12],"which":[13,131],"are":[14],"based":[15],"the":[17,32,45,120,142,158],"Common":[18],"Crawl":[19],"corpus":[20,82,85,121],"(which":[21],"is":[22,42,116],"a":[23,127],"compilation":[24],"more":[26],"than":[27,44],"9,000,000,000":[28],"pages":[29],"from":[30],"across":[31],"web)":[33],"and":[34,50,58,66,83,124,145,154],"other":[35],"resources.":[36],"We":[37,74,87,140],"prove":[38],"that":[39],"our":[40,161],"model":[41,62,144],"better":[43,52],"Google":[46],"WEB1T":[47],"n-gram":[48],"counts":[49],"assures":[51],"quality":[53],"in":[54,69,99,104,152],"terms":[55],"perplexity":[57,155],"machine":[59],"translation.":[60],"The":[61],"includes":[63],"lower-counting":[64],"entries":[65],"also":[67,75,88],"de-duplication":[68],"order":[70,105],"to":[71,106],"lessen":[72],"boilerplate.":[73],"POS":[77],"tagged":[78],"version":[79],"raw":[81,84],"itself.":[86],"dictionary":[90],"Polish.":[93],"By":[94],"maintaining":[95],"singletons,":[96],"Kneser-Ney":[97],"smoothing":[98],"SRILM":[100],"toolkit":[101],"was":[102,122],"used":[103],"construct":[107],"models.":[111],"In":[112],"this":[113,138],"research,":[114],"it":[115],"detailed":[117],"exactly":[118],"how":[119],"obtained":[123],"pre-processed,":[125],"with":[126,135],"prominence":[128],"issues":[130],"surface":[132],"when":[133],"working":[134],"information":[136],"scale.":[139],"train":[141],"finally":[146],"present":[147],"advances":[148],"BLEU":[150],"score":[151],"MT":[153],"values,":[156],"through":[157],"utilization":[159],"model.":[162]},"counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
