{"id":"https://openalex.org/W7127623387","doi":"https://doi.org/10.48550/arxiv.2602.03245","title":"Mi\u0107i Princ -- A Little Boy Teaching Speech Technologies the Chakavian Dialect","display_name":"Mi\u0107i Princ -- A Little Boy Teaching Speech Technologies the Chakavian Dialect","publication_year":2026,"publication_date":"2026-02-03","ids":{"openalex":"https://openalex.org/W7127623387","doi":"https://doi.org/10.48550/arxiv.2602.03245"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2602.03245","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.03245","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2602.03245","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124976041","display_name":"Nikola Ljube\u0161i\u0107","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ljube\u0161i\u0107, Nikola","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125046279","display_name":"Peter Rupnik","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rupnik, Peter","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5125072829","display_name":"Tea Perin\u010di\u0107","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Perin\u010di\u0107, Tea","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5124976041"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13996","display_name":"Diverse Musicological Studies","score":0.16830000281333923,"subfield":{"id":"https://openalex.org/subfields/1210","display_name":"Music"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T13996","display_name":"Diverse Musicological Studies","score":0.16830000281333923,"subfield":{"id":"https://openalex.org/subfields/1210","display_name":"Music"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12090","display_name":"Language and cultural evolution","score":0.06469999998807907,"subfield":{"id":"https://openalex.org/subfields/3316","display_name":"Cultural Studies"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10751","display_name":"Forensic and Genetic Research","score":0.04729999974370003,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5727999806404114},{"id":"https://openalex.org/keywords/character","display_name":"Character (mathematics)","score":0.5120999813079834},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.4577000141143799},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.43209999799728394},{"id":"https://openalex.org/keywords/test","display_name":"Test (biology)","score":0.4259999990463257},{"id":"https://openalex.org/keywords/speech-technology","display_name":"Speech technology","score":0.41769999265670776},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.3580999970436096},{"id":"https://openalex.org/keywords/variation","display_name":"Variation (astronomy)","score":0.3124000132083893},{"id":"https://openalex.org/keywords/digital-audio","display_name":"Digital audio","score":0.31189998984336853}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6401000022888184},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5727999806404114},{"id":"https://openalex.org/C2780861071","wikidata":"https://www.wikidata.org/wiki/Q1062934","display_name":"Character (mathematics)","level":2,"score":0.5120999813079834},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.4577000141143799},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.45500001311302185},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.43209999799728394},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.43149998784065247},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.4259999990463257},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.423799991607666},{"id":"https://openalex.org/C504749915","wikidata":"https://www.wikidata.org/wiki/Q9010971","display_name":"Speech technology","level":3,"score":0.41769999265670776},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.36329999566078186},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.3580999970436096},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.3199999928474426},{"id":"https://openalex.org/C2778334786","wikidata":"https://www.wikidata.org/wiki/Q1586270","display_name":"Variation (astronomy)","level":2,"score":0.3124000132083893},{"id":"https://openalex.org/C87687168","wikidata":"https://www.wikidata.org/wiki/Q173114","display_name":"Digital audio","level":4,"score":0.31189998984336853},{"id":"https://openalex.org/C3018824978","wikidata":"https://www.wikidata.org/wiki/Q2894891","display_name":"Error analysis","level":2,"score":0.3118000030517578},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.3093000054359436},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.29260000586509705},{"id":"https://openalex.org/C2778370645","wikidata":"https://www.wikidata.org/wiki/Q5668585","display_name":"Note-taking","level":2,"score":0.26440000534057617},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.26429998874664307},{"id":"https://openalex.org/C154775046","wikidata":"https://www.wikidata.org/wiki/Q188","display_name":"German","level":2,"score":0.2596000134944916},{"id":"https://openalex.org/C2779855358","wikidata":"https://www.wikidata.org/wiki/Q7979","display_name":"British English","level":2,"score":0.2563000023365021},{"id":"https://openalex.org/C504583287","wikidata":"https://www.wikidata.org/wiki/Q1428637","display_name":"Spoken word","level":3,"score":0.25540000200271606},{"id":"https://openalex.org/C2777382242","wikidata":"https://www.wikidata.org/wiki/Q6017816","display_name":"Index (typography)","level":2,"score":0.25380000472068787},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.2535000145435333},{"id":"https://openalex.org/C128422554","wikidata":"https://www.wikidata.org/wiki/Q20077126","display_name":"Sound recording and reproduction","level":2,"score":0.2515000104904175},{"id":"https://openalex.org/C177454536","wikidata":"https://www.wikidata.org/wiki/Q578290","display_name":"Emphasis (telecommunications)","level":2,"score":0.25060001015663147},{"id":"https://openalex.org/C169903167","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Test set","level":2,"score":0.2500999867916107}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2602.03245","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.03245","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2602.03245","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.03245","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.8014037609100342,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"This":[0],"paper":[1,131],"documents":[2],"our":[3,66,229],"efforts":[4],"in":[5,91,267],"releasing":[6],"the":[7,13,16,23,32,35,39,45,70,77,81,84,88,92,102,114,124,135,159,161,166,186,200,251,258,261,264,268,272,276],"printed":[8,82],"and":[9,34,50,73,83,215,253],"audio":[10,36,85],"book":[11],"of":[12,15,38,47,80,104,185,196,202,211,245,260,263,275],"translation":[14],"famous":[17],"novel":[18],"The":[19,62,108,222],"Little":[20],"Prince":[21],"into":[22,240],"Chakavian":[24,149,277],"dialect,":[25],"as":[26,123,217,219],"a":[27,174,241],"computer-readable,":[28],"AI-ready":[29],"dataset,":[30,236],"with":[31,142,157],"textual":[33],"components":[37],"two":[40,183],"releases":[41],"now":[42,99,233],"aligned":[43],"on":[44,57,100,145,165,188,209],"level":[46],"each":[48],"written":[49],"spoken":[51],"word.":[52],"Our":[53],"motivation":[54,110,224],"for":[55,117,225],"working":[56],"this":[58,95,130,197,226,246],"release":[59,227],"is":[60,65,97,111,228],"multiple.":[61],"first":[63],"one":[64,125],"wish":[67],"to":[68,112,148,173,179,182,256],"preserve":[69],"highly":[71,234],"valuable":[72],"specific":[74],"content":[75,96],"beyond":[76,199,250],"small":[78],"editions":[79],"book.":[86],"With":[87],"dataset":[89,198],"published":[90],"CLARIN.SI":[93],"repository,":[94],"from":[98],"at":[101],"fingertips":[103],"any":[105],"interested":[106],"individual.":[107],"second":[109],"make":[113],"data":[115,169],"available":[116],"various":[118],"artificial-intelligence-related":[119],"usage":[120],"scenarios,":[121],"such":[122],"we":[126,177,204],"follow":[127],"upon":[128],"inside":[129],"already":[132,206],"--":[133],"adapting":[134,158],"Whisper-large-v3":[136],"open":[137],"automatic":[138],"speech":[139],"recognition":[140],"model,":[141,160],"decent":[143],"performance":[144],"standard":[146],"Croatian,":[147],"dialectal":[150,220],"speech.":[151],"We":[152,191],"can":[153],"happily":[154],"report":[155],"that":[156,231],"word":[162],"error":[163,187],"rate":[164],"selected":[167],"test":[168],"has":[170],"being":[171],"reduced":[172],"half,":[175],"while":[176],"managed":[178],"remove":[180],"up":[181],"thirds":[184],"character":[189],"level.":[190],"envision":[192],"many":[193],"more":[194],"usages":[195],"set":[201],"experiments":[203],"have":[205],"performed,":[207],"both":[208],"tasks":[210],"artificial":[212],"intelligence":[213],"research":[214,252],"application,":[216],"well":[218],"research.":[221],"third":[223],"hope":[230],"this,":[232],"structured":[235],"will":[237],"be":[238],"transformed":[239],"digital":[242],"online":[243],"edition":[244],"work,":[247],"allowing":[248],"individuals":[249],"technology":[254],"communities":[255],"enjoy":[257],"beauty":[259],"message":[262],"little":[265],"boy":[266],"desert,":[269],"told":[270],"through":[271],"spectacular":[273],"prism":[274],"dialect.":[278]},"counts_by_year":[],"updated_date":"2026-02-06T02:05:47.483045","created_date":"2026-02-06T00:00:00"}
