{"id":"https://openalex.org/W4387608615","doi":"https://doi.org/10.15439/2023f1609","title":"BIGOS - Benchmark Intended Grouping of Open Speech Corpora for Polish Automatic Speech Recognition","display_name":"BIGOS - Benchmark Intended Grouping of Open Speech Corpora for Polish Automatic Speech Recognition","publication_year":2023,"publication_date":"2023-09-26","ids":{"openalex":"https://openalex.org/W4387608615","doi":"https://doi.org/10.15439/2023f1609"},"language":"en","primary_location":{"id":"doi:10.15439/2023f1609","is_oa":true,"landing_page_url":"https://doi.org/10.15439/2023f1609","pdf_url":"https://annals-csis.org/proceedings/2023/drp/pdf/1609.pdf","source":{"id":"https://openalex.org/S4220651875","display_name":"Annals of Computer Science and Information Systems","issn_l":"2300-5963","issn":["2300-5963"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":"https://openalex.org/P4310317484","host_organization_name":"Polskie Towarzystwo Informatyczne","host_organization_lineage":["https://openalex.org/P4310317484"],"host_organization_lineage_names":["Polskie Towarzystwo Informatyczne"],"type":"conference"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Annals of Computer Science and Information Systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://annals-csis.org/proceedings/2023/drp/pdf/1609.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5093057114","display_name":"Micha\u0142 Junczyk","orcid":"https://orcid.org/0000-0002-7545-5152"},"institutions":[{"id":"https://openalex.org/I4210096386","display_name":"Bridge University","ror":"https://ror.org/00cbm0437","country_code":"SS","type":"education","lineage":["https://openalex.org/I4210096386"]}],"countries":["SS"],"is_corresponding":true,"raw_author_name":"Micha\u0142 Junczyk","raw_affiliation_strings":["University"],"affiliations":[{"raw_affiliation_string":"University","institution_ids":["https://openalex.org/I4210096386"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5093057114"],"corresponding_institution_ids":["https://openalex.org/I4210096386"],"apc_list":null,"apc_paid":null,"fwci":0.1119,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.31150077,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"35","issue":null,"first_page":"585","last_page":"590"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9835000038146973,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9835000038146973,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.9211000204086304,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8057262897491455},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7614563703536987},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7018424272537231},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5786370038986206},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5191642045974731},{"id":"https://openalex.org/keywords/speech-corpus","display_name":"Speech corpus","score":0.4650363326072693},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.3010818362236023}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8057262897491455},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7614563703536987},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7018424272537231},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5786370038986206},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5191642045974731},{"id":"https://openalex.org/C91863865","wikidata":"https://www.wikidata.org/wiki/Q4349497","display_name":"Speech corpus","level":3,"score":0.4650363326072693},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.3010818362236023},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.15439/2023f1609","is_oa":true,"landing_page_url":"https://doi.org/10.15439/2023f1609","pdf_url":"https://annals-csis.org/proceedings/2023/drp/pdf/1609.pdf","source":{"id":"https://openalex.org/S4220651875","display_name":"Annals of Computer Science and Information Systems","issn_l":"2300-5963","issn":["2300-5963"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":"https://openalex.org/P4310317484","host_organization_name":"Polskie Towarzystwo Informatyczne","host_organization_lineage":["https://openalex.org/P4310317484"],"host_organization_lineage_names":["Polskie Towarzystwo Informatyczne"],"type":"conference"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Annals of Computer Science and Information Systems","raw_type":"proceedings-article"},{"id":"pmh:oai:doaj.org/article:a37a6b314ba3439ab9271d953df7643a","is_oa":true,"landing_page_url":"https://doaj.org/article/a37a6b314ba3439ab9271d953df7643a","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Annals of computer science and information systems, Vol 35, Pp 585-590 (2023)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.15439/2023f1609","is_oa":true,"landing_page_url":"https://doi.org/10.15439/2023f1609","pdf_url":"https://annals-csis.org/proceedings/2023/drp/pdf/1609.pdf","source":{"id":"https://openalex.org/S4220651875","display_name":"Annals of Computer Science and Information Systems","issn_l":"2300-5963","issn":["2300-5963"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":"https://openalex.org/P4310317484","host_organization_name":"Polskie Towarzystwo Informatyczne","host_organization_lineage":["https://openalex.org/P4310317484"],"host_organization_lineage_names":["Polskie Towarzystwo Informatyczne"],"type":"conference"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Annals of Computer Science and Information Systems","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4387608615.pdf","grobid_xml":"https://content.openalex.org/works/W4387608615.grobid-xml"},"referenced_works_count":31,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W2005458850","https://openalex.org/W2148577181","https://openalex.org/W2300676179","https://openalex.org/W2620589393","https://openalex.org/W2805761224","https://openalex.org/W2892490883","https://openalex.org/W2973028582","https://openalex.org/W3029693800","https://openalex.org/W3030437843","https://openalex.org/W3089840143","https://openalex.org/W3095410713","https://openalex.org/W3101648800","https://openalex.org/W3106426924","https://openalex.org/W3158977306","https://openalex.org/W3162534420","https://openalex.org/W3184326107","https://openalex.org/W3193283619","https://openalex.org/W3197674197","https://openalex.org/W3206083773","https://openalex.org/W4210274724","https://openalex.org/W4211239472","https://openalex.org/W4221142121","https://openalex.org/W4221146809","https://openalex.org/W4223498868","https://openalex.org/W4280618630","https://openalex.org/W4286902103","https://openalex.org/W4307259952","https://openalex.org/W4307322847","https://openalex.org/W4311000453","https://openalex.org/W4391285994"],"related_works":["https://openalex.org/W4200068392","https://openalex.org/W2772686614","https://openalex.org/W2036933852","https://openalex.org/W2152945827","https://openalex.org/W2015513221","https://openalex.org/W2181773877","https://openalex.org/W301864623","https://openalex.org/W2757826346","https://openalex.org/W2184371793","https://openalex.org/W2110108310"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"a":[3,11,53,84,99],"Benchmark":[4],"Intended":[5],"Grouping":[6],"of":[7,24,56,69,79,87,110,144],"Open":[8],"Speech":[9,18],"(BIGOS),":[10],"new":[12],"corpus":[13,135],"designed":[14],"for":[15,114,132],"Polish":[16],"Automatic":[17],"Recognition":[19],"(ASR)":[20],"systems.This":[21],"initial":[22],"version":[23],"the":[25,59,67,70,88,119,145],"benchmark":[26,125,146],"leverages":[27],"1,900":[28],"audio":[29],"recordings":[30,57],"from":[31,36],"71":[32],"distinct":[33],"speakers,":[34],"sourced":[35],"10":[37],"publicly":[38,149],"available":[39],"speech":[40],"corpora.Three":[41],"proprietary":[42],"ASR":[43,48,115],"systems":[44,49],"and":[45,58,118,136,142],"five":[46],"open-source":[47,72],"were":[50],"evaluated":[51],"on":[52,75,91,122],"diverse":[54],"set":[55],"corresponding":[60],"original":[61],"transcriptions.Interestingly,":[62],"it":[63],"was":[64,94],"found":[65],"that":[66,78,139],"performance":[68],"latest":[71],"models":[73],"is":[74],"par":[76],"with":[77,130],"more":[80],"established":[81],"commercial":[82],"services.Furthermore,":[83],"significant":[85],"influence":[86],"model":[89],"size":[90],"system":[92],"accuracy":[93],"observed,":[95],"as":[96,98],"well":[97],"decrease":[100],"in":[101],"scenarios":[102],"involving":[103],"highly":[104],"specialized":[105],"or":[106],"spontaneous":[107],"speech.The":[108],"challenges":[109],"using":[111],"public":[112],"datasets":[113],"evaluation":[116],"purposes":[117],"limitations":[120],"based":[121],"this":[123],"inaugural":[124],"are":[126,147],"critically":[127],"discussed,":[128],"along":[129],"recommendations":[131],"future":[133],"research.BIGOS":[134],"associated":[137],"tools":[138],"facilitate":[140],"replication":[141],"customization":[143],"made":[148],"available.":[150]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-11T06:11:40.159057","created_date":"2025-10-10T00:00:00"}
