{"id":"https://openalex.org/W4406681929","doi":"https://doi.org/10.1186/s13636-024-00388-w","title":"A review on speech recognition approaches and challenges for Portuguese: exploring the feasibility of fine-tuning large-scale end-to-end models","display_name":"A review on speech recognition approaches and challenges for Portuguese: exploring the feasibility of fine-tuning large-scale end-to-end models","publication_year":2025,"publication_date":"2025-01-21","ids":{"openalex":"https://openalex.org/W4406681929","doi":"https://doi.org/10.1186/s13636-024-00388-w"},"language":"en","primary_location":{"id":"doi:10.1186/s13636-024-00388-w","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13636-024-00388-w","pdf_url":"https://asmp-eurasipjournals.springeropen.com/counter/pdf/10.1186/s13636-024-00388-w","source":{"id":"https://openalex.org/S19605986","display_name":"EURASIP Journal on Audio Speech and Music Processing","issn_l":"1687-4714","issn":["1687-4714","1687-4722"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319965","host_organization_name":"Springer Nature","host_organization_lineage":["https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"EURASIP Journal on Audio, Speech, and Music Processing","raw_type":"journal-article"},"type":"review","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://asmp-eurasipjournals.springeropen.com/counter/pdf/10.1186/s13636-024-00388-w","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101886099","display_name":"Yan Li","orcid":"https://orcid.org/0000-0003-3566-0992"},"institutions":[{"id":"https://openalex.org/I49835588","display_name":"Macao Polytechnic University","ror":"https://ror.org/02sf5td35","country_code":"MO","type":"education","lineage":["https://openalex.org/I49835588"]}],"countries":["MO"],"is_corresponding":true,"raw_author_name":"Yan Li","raw_affiliation_strings":["Faculty of Applied Sciences, Macao Polytechnic University, Macao, China"],"affiliations":[{"raw_affiliation_string":"Faculty of Applied Sciences, Macao Polytechnic University, Macao, China","institution_ids":["https://openalex.org/I49835588"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101760165","display_name":"Yapeng Wang","orcid":"https://orcid.org/0000-0002-1085-5091"},"institutions":[{"id":"https://openalex.org/I49835588","display_name":"Macao Polytechnic University","ror":"https://ror.org/02sf5td35","country_code":"MO","type":"education","lineage":["https://openalex.org/I49835588"]}],"countries":["MO"],"is_corresponding":false,"raw_author_name":"Yapeng Wang","raw_affiliation_strings":["Faculty of Applied Sciences, Macao Polytechnic University, Macao, China"],"affiliations":[{"raw_affiliation_string":"Faculty of Applied Sciences, Macao Polytechnic University, Macao, China","institution_ids":["https://openalex.org/I49835588"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078266731","display_name":"Lap Man Hoi","orcid":"https://orcid.org/0000-0002-1074-9846"},"institutions":[{"id":"https://openalex.org/I49835588","display_name":"Macao Polytechnic University","ror":"https://ror.org/02sf5td35","country_code":"MO","type":"education","lineage":["https://openalex.org/I49835588"]}],"countries":["MO"],"is_corresponding":false,"raw_author_name":"Lap Man Hoi","raw_affiliation_strings":["Faculty of Applied Sciences, Macao Polytechnic University, Macao, China"],"affiliations":[{"raw_affiliation_string":"Faculty of Applied Sciences, Macao Polytechnic University, Macao, China","institution_ids":["https://openalex.org/I49835588"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000883962","display_name":"Dingcheng Yang","orcid":"https://orcid.org/0000-0001-5313-4481"},"institutions":[{"id":"https://openalex.org/I141649914","display_name":"Nanchang University","ror":"https://ror.org/042v6xz23","country_code":"CN","type":"education","lineage":["https://openalex.org/I141649914"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dingcheng Yang","raw_affiliation_strings":["School of Information Engineering, Nanchang University, Nanchang, China"],"affiliations":[{"raw_affiliation_string":"School of Information Engineering, Nanchang University, Nanchang, China","institution_ids":["https://openalex.org/I141649914"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5022394682","display_name":"Sio\u2010Kei Im","orcid":"https://orcid.org/0000-0002-5599-4300"},"institutions":[{"id":"https://openalex.org/I49835588","display_name":"Macao Polytechnic University","ror":"https://ror.org/02sf5td35","country_code":"MO","type":"education","lineage":["https://openalex.org/I49835588"]}],"countries":["MO"],"is_corresponding":false,"raw_author_name":"Sio-Kei Im","raw_affiliation_strings":["Macao Polytechnic University, Macao, China"],"affiliations":[{"raw_affiliation_string":"Macao Polytechnic University, Macao, China","institution_ids":["https://openalex.org/I49835588"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101886099"],"corresponding_institution_ids":["https://openalex.org/I49835588"],"apc_list":{"value":1115,"currency":"GBP","value_usd":1367},"apc_paid":{"value":1115,"currency":"GBP","value_usd":1367},"fwci":4.8506,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.93684385,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"2025","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.9022475481033325},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8859710693359375},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.5809869170188904},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5301098823547363},{"id":"https://openalex.org/keywords/end-user","display_name":"End user","score":0.44657108187675476},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3779340982437134},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.1554100215435028}],"concepts":[{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.9022475481033325},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8859710693359375},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.5809869170188904},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5301098823547363},{"id":"https://openalex.org/C91262260","wikidata":"https://www.wikidata.org/wiki/Q528074","display_name":"End user","level":2,"score":0.44657108187675476},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3779340982437134},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.1554100215435028},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1186/s13636-024-00388-w","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13636-024-00388-w","pdf_url":"https://asmp-eurasipjournals.springeropen.com/counter/pdf/10.1186/s13636-024-00388-w","source":{"id":"https://openalex.org/S19605986","display_name":"EURASIP Journal on Audio Speech and Music Processing","issn_l":"1687-4714","issn":["1687-4714","1687-4722"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319965","host_organization_name":"Springer Nature","host_organization_lineage":["https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"EURASIP Journal on Audio, Speech, and Music Processing","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:eba8888c4d1c443091722512eab58292","is_oa":true,"landing_page_url":"https://doaj.org/article/eba8888c4d1c443091722512eab58292","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"EURASIP Journal on Audio, Speech, and Music Processing, Vol 2025, Iss 1, Pp 1-13 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1186/s13636-024-00388-w","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13636-024-00388-w","pdf_url":"https://asmp-eurasipjournals.springeropen.com/counter/pdf/10.1186/s13636-024-00388-w","source":{"id":"https://openalex.org/S19605986","display_name":"EURASIP Journal on Audio Speech and Music Processing","issn_l":"1687-4714","issn":["1687-4714","1687-4722"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319965","host_organization_name":"Springer Nature","host_organization_lineage":["https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"EURASIP Journal on Audio, Speech, and Music Processing","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4406681929.pdf","grobid_xml":"https://content.openalex.org/works/W4406681929.grobid-xml"},"referenced_works_count":50,"referenced_works":["https://openalex.org/W49349007","https://openalex.org/W2136922672","https://openalex.org/W2139851371","https://openalex.org/W2154740140","https://openalex.org/W2169919054","https://openalex.org/W2279516409","https://openalex.org/W2294635046","https://openalex.org/W2327501763","https://openalex.org/W2792511626","https://openalex.org/W2898584349","https://openalex.org/W2912581782","https://openalex.org/W2946145984","https://openalex.org/W2962824709","https://openalex.org/W2963834942","https://openalex.org/W2973049979","https://openalex.org/W2992847832","https://openalex.org/W3001376538","https://openalex.org/W3010675254","https://openalex.org/W3081670364","https://openalex.org/W3094201484","https://openalex.org/W3095410713","https://openalex.org/W3095762785","https://openalex.org/W3096215352","https://openalex.org/W3100732527","https://openalex.org/W3135340187","https://openalex.org/W3160405885","https://openalex.org/W3172034247","https://openalex.org/W3197478142","https://openalex.org/W3197771105","https://openalex.org/W3212799896","https://openalex.org/W4210463634","https://openalex.org/W4221160950","https://openalex.org/W4221167707","https://openalex.org/W4226396073","https://openalex.org/W4307861474","https://openalex.org/W4309647876","https://openalex.org/W4312203699","https://openalex.org/W4319862635","https://openalex.org/W4319862667","https://openalex.org/W4367593865","https://openalex.org/W4376869734","https://openalex.org/W4391021530","https://openalex.org/W4391021666","https://openalex.org/W4392903956","https://openalex.org/W6600628372","https://openalex.org/W6600741150","https://openalex.org/W6600755281","https://openalex.org/W6600769105","https://openalex.org/W6605299328","https://openalex.org/W6818723395"],"related_works":["https://openalex.org/W4299590256","https://openalex.org/W2151749779","https://openalex.org/W4404782863","https://openalex.org/W3163634122","https://openalex.org/W2110442089","https://openalex.org/W2166381389","https://openalex.org/W4315785295","https://openalex.org/W4393280045","https://openalex.org/W2054736184","https://openalex.org/W3159728998"],"abstract_inverted_index":{"At":[0],"present,":[1],"automatic":[2],"speech":[3,22,49,78,107,162],"recognition":[4,23,50,64,79,90,108],"has":[5],"become":[6],"an":[7,110],"important":[8],"bridge":[9],"for":[10,100,173],"human-computer":[11],"interaction":[12],"and":[13,45,91,95,116,135,179],"is":[14,25,57],"widely":[15],"applied":[16],"in":[17,160],"multiple":[18],"fields.":[19],"The":[20,52],"Portuguese":[21,48,77,89,93,106,125,161],"task":[24],"gradually":[26],"receiving":[27],"attention":[28],"due":[29],"to":[30,60,127,156],"its":[31],"unique":[32],"language":[33],"stance.":[34],"However,":[35],"the":[36,43,61,70,114,129,152,157,174],"relatively":[37],"scarce":[38],"data":[39],"resources":[40,99],"have":[41],"constrained":[42],"development":[44],"application":[46],"of":[47,54,63,73,131,142,177],"systems.":[51,65],"neglect":[53],"accent":[55],"issues":[56],"also":[58],"detrimental":[59],"promotion":[62],"This":[66],"study":[67],"focuses":[68],"on":[69,76,121],"research":[71,84,167,178],"progress":[72],"end-to-end":[74],"technology":[75],"task.":[80],"It":[81],"discusses":[82],"relevant":[83],"from":[85],"two":[86],"directions:":[87],"Brazilian":[88],"European":[92,105,124],"recognition,":[94,163],"organizes":[96],"available":[97],"corpus":[98],"potential":[101],"researchers.":[102],"Then,":[103],"taking":[104],"as":[109,118],"example,":[111],"it":[112,164],"takes":[113],"Fairseq-S2T":[115],"Whisper":[117,138],"benchmarks":[119],"tested":[120],"a":[122,140],"500-h":[123],"dataset":[126],"estimate":[128],"performance":[130],"large-scale":[132],"pre-trained":[133],"models":[134],"fine-tuning":[136],"techniques.":[137],"obtained":[139],"WER":[141],"5.11%":[143],"which":[144,169],"indicates":[145],"that":[146],"multilingual":[147],"joint":[148],"training":[149],"can":[150],"enhance":[151],"generalization":[153],"ability.":[154],"Finally,":[155],"existing":[158],"problems":[159],"explores":[165],"future":[166],"directions,":[168],"provides":[170],"new":[171],"ideas":[172],"next":[175],"stage":[176],"system":[180],"construction.":[181]},"counts_by_year":[{"year":2026,"cited_by_count":2}],"updated_date":"2026-04-02T15:55:50.835912","created_date":"2025-10-10T00:00:00"}
