{"id":"https://openalex.org/W4402993573","doi":"https://doi.org/10.3233/faia240428","title":"Pushing the Boundaries of Natural Language Processing (NLP): Enhancing Catalan Text Transcription Through Large-Scale Models in the Educational Field","display_name":"Pushing the Boundaries of Natural Language Processing (NLP): Enhancing Catalan Text Transcription Through Large-Scale Models in the Educational Field","publication_year":2024,"publication_date":"2024-09-25","ids":{"openalex":"https://openalex.org/W4402993573","doi":"https://doi.org/10.3233/faia240428"},"language":"en","primary_location":{"id":"doi:10.3233/faia240428","is_oa":true,"landing_page_url":"http://dx.doi.org/10.3233/faia240428","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"http://dx.doi.org/10.3233/faia240428","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5107666192","display_name":"Sergi Ramirez-Mitjans","orcid":"https://orcid.org/0000-0002-7782-3270"},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]},{"id":"https://openalex.org/I9617848","display_name":"Universitat Polit\u00e8cnica de Catalunya","ror":"https://ror.org/03mb6wj31","country_code":"ES","type":"education","lineage":["https://openalex.org/I9617848"]}],"countries":["ES","US"],"is_corresponding":true,"raw_author_name":"Sergi Ramirez-Mitjans","raw_affiliation_strings":["Bachelor\u2019s degree in Artificial Intelligence at Universitat Polit\u00e8cnica de Catalunya (UPC)","Knowledge Engineering and Machine Learning group at Intelligent Data Science and Artificial Intelligence (IDEAI-UPC)"],"raw_orcid":"https://orcid.org/0000-0002-7782-3270","affiliations":[{"raw_affiliation_string":"Bachelor\u2019s degree in Artificial Intelligence at Universitat Polit\u00e8cnica de Catalunya (UPC)","institution_ids":["https://openalex.org/I9617848"]},{"raw_affiliation_string":"Knowledge Engineering and Machine Learning group at Intelligent Data Science and Artificial Intelligence (IDEAI-UPC)","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109792484","display_name":"Huilin Ni","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]},{"id":"https://openalex.org/I9617848","display_name":"Universitat Polit\u00e8cnica de Catalunya","ror":"https://ror.org/03mb6wj31","country_code":"ES","type":"education","lineage":["https://openalex.org/I9617848"]}],"countries":["ES","US"],"is_corresponding":false,"raw_author_name":"Huilin Ni","raw_affiliation_strings":["Bachelor\u2019s degree in Artificial Intelligence at Universitat Polit\u00e8cnica de Catalunya (UPC)","Knowledge Engineering and Machine Learning group at Intelligent Data Science and Artificial Intelligence (IDEAI-UPC)"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Bachelor\u2019s degree in Artificial Intelligence at Universitat Polit\u00e8cnica de Catalunya (UPC)","institution_ids":["https://openalex.org/I9617848"]},{"raw_affiliation_string":"Knowledge Engineering and Machine Learning group at Intelligent Data Science and Artificial Intelligence (IDEAI-UPC)","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107666193","display_name":"Jofre Mosegu\u00ed","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]},{"id":"https://openalex.org/I9617848","display_name":"Universitat Polit\u00e8cnica de Catalunya","ror":"https://ror.org/03mb6wj31","country_code":"ES","type":"education","lineage":["https://openalex.org/I9617848"]}],"countries":["ES","US"],"is_corresponding":false,"raw_author_name":"Jofre Mosegu\u00ed","raw_affiliation_strings":["Bachelor\u2019s degree in Artificial Intelligence at Universitat Polit\u00e8cnica de Catalunya (UPC)","Knowledge Engineering and Machine Learning group at Intelligent Data Science and Artificial Intelligence (IDEAI-UPC)"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Bachelor\u2019s degree in Artificial Intelligence at Universitat Polit\u00e8cnica de Catalunya (UPC)","institution_ids":["https://openalex.org/I9617848"]},{"raw_affiliation_string":"Knowledge Engineering and Machine Learning group at Intelligent Data Science and Artificial Intelligence (IDEAI-UPC)","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025867016","display_name":"J Rodriguez Puerta","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]},{"id":"https://openalex.org/I9617848","display_name":"Universitat Polit\u00e8cnica de Catalunya","ror":"https://ror.org/03mb6wj31","country_code":"ES","type":"education","lineage":["https://openalex.org/I9617848"]}],"countries":["ES","US"],"is_corresponding":false,"raw_author_name":"Javier Puerta","raw_affiliation_strings":["Bachelor\u2019s degree in Artificial Intelligence at Universitat Polit\u00e8cnica de Catalunya (UPC)","Knowledge Engineering and Machine Learning group at Intelligent Data Science and Artificial Intelligence (IDEAI-UPC)"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Bachelor\u2019s degree in Artificial Intelligence at Universitat Polit\u00e8cnica de Catalunya (UPC)","institution_ids":["https://openalex.org/I9617848"]},{"raw_affiliation_string":"Knowledge Engineering and Machine Learning group at Intelligent Data Science and Artificial Intelligence (IDEAI-UPC)","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109792485","display_name":"Marcel Vera","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]},{"id":"https://openalex.org/I9617848","display_name":"Universitat Polit\u00e8cnica de Catalunya","ror":"https://ror.org/03mb6wj31","country_code":"ES","type":"education","lineage":["https://openalex.org/I9617848"]}],"countries":["ES","US"],"is_corresponding":false,"raw_author_name":"Marcel Vera","raw_affiliation_strings":["Bachelor\u2019s degree in Artificial Intelligence at Universitat Polit\u00e8cnica de Catalunya (UPC)","Knowledge Engineering and Machine Learning group at Intelligent Data Science and Artificial Intelligence (IDEAI-UPC)"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Bachelor\u2019s degree in Artificial Intelligence at Universitat Polit\u00e8cnica de Catalunya (UPC)","institution_ids":["https://openalex.org/I9617848"]},{"raw_affiliation_string":"Knowledge Engineering and Machine Learning group at Intelligent Data Science and Artificial Intelligence (IDEAI-UPC)","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5020268190","display_name":"Karina Gibert","orcid":"https://orcid.org/0000-0002-8542-3509"},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]},{"id":"https://openalex.org/I9617848","display_name":"Universitat Polit\u00e8cnica de Catalunya","ror":"https://ror.org/03mb6wj31","country_code":"ES","type":"education","lineage":["https://openalex.org/I9617848"]}],"countries":["ES","US"],"is_corresponding":false,"raw_author_name":"Karina Gibert","raw_affiliation_strings":["Bachelor\u2019s degree in Artificial Intelligence at Universitat Polit\u00e8cnica de Catalunya (UPC)","Knowledge Engineering and Machine Learning group at Intelligent Data Science and Artificial Intelligence (IDEAI-UPC)"],"raw_orcid":"https://orcid.org/0000-0002-8542-3509","affiliations":[{"raw_affiliation_string":"Bachelor\u2019s degree in Artificial Intelligence at Universitat Polit\u00e8cnica de Catalunya (UPC)","institution_ids":["https://openalex.org/I9617848"]},{"raw_affiliation_string":"Knowledge Engineering and Machine Learning group at Intelligent Data Science and Artificial Intelligence (IDEAI-UPC)","institution_ids":["https://openalex.org/I1343180700"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5107666192"],"corresponding_institution_ids":["https://openalex.org/I1343180700","https://openalex.org/I9617848"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.35323721,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9857000112533569,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9857000112533569,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.9664000272750854,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/catalan","display_name":"Catalan","score":0.8526095747947693},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.684525191783905},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6337753534317017},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6115747690200806},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.488724946975708},{"id":"https://openalex.org/keywords/transcription","display_name":"Transcription (linguistics)","score":0.4882822036743164},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.4411933124065399},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.2679067552089691},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.18857204914093018},{"id":"https://openalex.org/keywords/cartography","display_name":"Cartography","score":0.13786956667900085},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08417516946792603},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.05739325284957886}],"concepts":[{"id":"https://openalex.org/C164105321","wikidata":"https://www.wikidata.org/wiki/Q7026","display_name":"Catalan","level":2,"score":0.8526095747947693},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.684525191783905},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6337753534317017},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6115747690200806},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.488724946975708},{"id":"https://openalex.org/C179926584","wikidata":"https://www.wikidata.org/wiki/Q207714","display_name":"Transcription (linguistics)","level":2,"score":0.4882822036743164},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.4411933124065399},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.2679067552089691},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.18857204914093018},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.13786956667900085},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08417516946792603},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.05739325284957886},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3233/faia240428","is_oa":true,"landing_page_url":"http://dx.doi.org/10.3233/faia240428","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},{"id":"pmh:oai:upcommons.upc.edu:2117/433067","is_oa":true,"landing_page_url":"https://hdl.handle.net/2117/433067","pdf_url":"https://upcommons.upc.edu/bitstreams/93fa1be5-9270-43a3-8bf5-90495bcf9f6d/download","source":{"id":"https://openalex.org/S4377196262","display_name":"UPCommons institutional repository (Universitat Polit\u00e8cnica de Catalunya)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I9617848","host_organization_name":"Universitat Polit\u00e8cnica de Catalunya","host_organization_lineage":["https://openalex.org/I9617848"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/conferenceObject"}],"best_oa_location":{"id":"doi:10.3233/faia240428","is_oa":true,"landing_page_url":"http://dx.doi.org/10.3233/faia240428","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320334830","display_name":"Ag\u00e8ncia de Gesti\u00f3 d'Ajuts Universitaris i de Recerca","ror":"https://ror.org/01n4pqe45"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2077845850","https://openalex.org/W2486167700","https://openalex.org/W613669934","https://openalex.org/W2904571626","https://openalex.org/W2893301572","https://openalex.org/W2945415727","https://openalex.org/W2946010871","https://openalex.org/W1494937920","https://openalex.org/W2078076122","https://openalex.org/W2097009648"],"abstract_inverted_index":{"This":[0],"work":[1],"delves":[2],"into":[3,132],"the":[4,13,19,42,53,58,63,69,76,99,119,168,172,184,189,194,197,206,209,226,255,277,280,305,315,323,328],"intricate":[5],"landscape":[6],"of":[7,15,21,57,71,78,101,129,156,208,258,279,297,322,327],"employing":[8],"large":[9],"language":[10,34,65],"models":[11],"for":[12,50,62],"transcription":[14,128,155],"Catalan":[16,30,64,94,105,130,133,158,190,213,252],"texts":[17],"within":[18],"realm":[20],"artificial":[22],"intelligence.":[23],"As":[24],"it":[25],"has":[26,36,234],"been":[27,236,312],"recently":[28],"stated,":[29],"is":[31,47,127],"a":[32,37,72,96,154,157,242,264],"medium":[33],"that":[35,135,233],"very":[38,274],"low":[39],"presence":[40,77],"in":[41,52,68,83,93,104,140,163,188,218,249,289],"digital":[43,74,86,91,102],"frameworks":[44,87],"and":[45,55,81,85,106,165,186,223,283,295,314,320,325],"there":[46],"still":[48,151,273],"space":[49],"improvement":[51],"generation":[54,100],"maturity":[56],"computational":[59,191],"linguistics":[60],"resources":[61,244],"[4].":[66],"Nowadays,":[67],"middle":[70],"disruptive":[73],"transformation,":[75],"all":[79],"cultures":[80],"languages":[82,220],"Internet":[84],"becomes":[88,95],"crucial.":[89],"Generating":[90],"contents":[92,103],"priority.":[97],"Prioritizing":[98],"being":[107],"able":[108],"to":[109,121,144,160,170,175,215,245,276],"use":[110],"Natural":[111],"Language":[112],"Processing":[113],"or":[114,196],"Speech":[115],"recognition":[116],"will":[117],"enlarge":[118],"impact":[120],"other":[122,141,176],"audiences.":[123],"A":[124],"central":[125],"activity":[126],"videos":[131,214],"subtitles":[134,162,173,217],"eventually":[136],"can":[137],"be":[138],"translated":[139,174],"languages.":[142,177],"Up":[143],"now,":[145],"channels":[146],"as":[147,149,269],"popular":[148],"YouTube":[150],"cannot":[152],"provide":[153],"video":[159,232],"generate":[161,216],"Catalan,":[164],"this":[166,202],"precludes":[167],"possibility":[169],"get":[171],"In":[178,201],"Catalonia,":[179],"some":[180,230,318],"specific":[181],"policies":[182],"promote":[183],"innovation":[185],"advances":[187],"resources,":[192],"like":[193],"[12],":[195],"AINA":[198],"project":[199],"[8].":[200],"work,":[203],"we":[204],"analyse":[205],"state":[207],"art":[210],"on":[211],"transcribing":[212,251],"three":[219],"(Catalan,":[221],"Spanish":[222,231],"English).":[224],"Eventually,":[225],"corpus":[227],"analysed":[228],"had":[229],"also":[235],"considered.":[237],"The":[238,302],"paper":[239,303],"rigorously":[240],"tests":[241],"few":[243],"discern":[246],"their":[247],"efficacy":[248],"accurately":[250],"text.":[253],"Despite":[254],"promising":[256],"capabilities":[257],"these":[259,270],"models,":[260],"our":[261],"findings":[262],"revealed":[263],"worse":[265],"performance":[266],"than":[267],"expected,":[268],"systems":[271],"are":[272,300],"sensitive":[275],"characteristics":[278],"speaker":[281],"voice":[282],"speech.":[284],"Common":[285],"challenges":[286],"included":[287],"difficulties":[288],"handling":[290],"Catalan-specific":[291],"diacritics,":[292],"idiosyncratic":[293],"vocabulary,":[294],"nuances":[296],"regional":[298],"dialects":[299],"identified.":[301],"describes":[304],"experimental":[306],"setting":[307],"where":[308],"several":[309],"tools":[310],"have":[311],"tested":[313,329],"results,":[316],"providing":[317],"conclusions":[319],"diagnosis":[321],"limitations":[324],"strengthnesses":[326],"tools.":[330]},"counts_by_year":[],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-10-10T00:00:00"}
