{"id":"https://openalex.org/W4403510311","doi":"https://doi.org/10.1109/access.2024.3482970","title":"Enhancing Automatic Speech Recognition: Effects of Semantic Audio Filtering on Models Performance","display_name":"Enhancing Automatic Speech Recognition: Effects of Semantic Audio Filtering on Models Performance","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4403510311","doi":"https://doi.org/10.1109/access.2024.3482970"},"language":"en","primary_location":{"id":"doi:10.1109/access.2024.3482970","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2024.3482970","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1109/access.2024.3482970","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114312941","display_name":"Yuriy Perezhohin","orcid":null},"institutions":[{"id":"https://openalex.org/I4210106265","display_name":"Instituto Nacional de Administra\u00e7\u00e3o, I. P.","ror":"https://ror.org/01h394058","country_code":"PT","type":"education","lineage":["https://openalex.org/I4210106265"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Yuriy Perezhohin","raw_affiliation_strings":["MyNorth AI Research, Oeiras, Portugal","MyNorth AI Research, Portugal"],"raw_orcid":"https://orcid.org/0009-0004-1046-7883","affiliations":[{"raw_affiliation_string":"MyNorth AI Research, Oeiras, Portugal","institution_ids":["https://openalex.org/I4210106265"]},{"raw_affiliation_string":"MyNorth AI Research, Portugal","institution_ids":[]}]},{"author_position":"middle","author":{"id":null,"display_name":"Tiago Santos","orcid":"https://orcid.org/0009-0000-5616-4141"},"institutions":[{"id":"https://openalex.org/I4210106265","display_name":"Instituto Nacional de Administra\u00e7\u00e3o, I. P.","ror":"https://ror.org/01h394058","country_code":"PT","type":"education","lineage":["https://openalex.org/I4210106265"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Tiago Santos","raw_affiliation_strings":["MyNorth AI Research, Oeiras, Portugal","MyNorth AI Research, Portugal"],"raw_orcid":"https://orcid.org/0009-0000-5616-4141","affiliations":[{"raw_affiliation_string":"MyNorth AI Research, Oeiras, Portugal","institution_ids":["https://openalex.org/I4210106265"]},{"raw_affiliation_string":"MyNorth AI Research, Portugal","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5094000405","display_name":"Victor Costa","orcid":null},"institutions":[{"id":"https://openalex.org/I4210106265","display_name":"Instituto Nacional de Administra\u00e7\u00e3o, I. P.","ror":"https://ror.org/01h394058","country_code":"PT","type":"education","lineage":["https://openalex.org/I4210106265"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Victor Costa","raw_affiliation_strings":["MyNorth AI Research, Oeiras, Portugal","MyNorth AI Research, Portugal"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"MyNorth AI Research, Oeiras, Portugal","institution_ids":["https://openalex.org/I4210106265"]},{"raw_affiliation_string":"MyNorth AI Research, Portugal","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054467954","display_name":"Fernando Peres","orcid":null},"institutions":[{"id":"https://openalex.org/I4210106265","display_name":"Instituto Nacional de Administra\u00e7\u00e3o, I. P.","ror":"https://ror.org/01h394058","country_code":"PT","type":"education","lineage":["https://openalex.org/I4210106265"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Fernando Peres","raw_affiliation_strings":["MyNorth AI Research, Oeiras, Portugal","MyNorth AI Research, Portugal"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"MyNorth AI Research, Oeiras, Portugal","institution_ids":["https://openalex.org/I4210106265"]},{"raw_affiliation_string":"MyNorth AI Research, Portugal","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5087976149","display_name":"Mauro Castelli","orcid":"https://orcid.org/0000-0002-8793-1451"},"institutions":[{"id":"https://openalex.org/I83558840","display_name":"Universidade Nova de Lisboa","ror":"https://ror.org/02xankh89","country_code":"PT","type":"education","lineage":["https://openalex.org/I83558840"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Mauro Castelli","raw_affiliation_strings":["NOVA Information Management School (NOVA IMS), Universidade NOVA de Lisboa, Campus de Campolide, Lisbon, Portugal","NOVA Information Management School (NOVA IMS), Universidade NOVA de Lisboa, Campus de Campolide, Lisboa, Portugal"],"raw_orcid":"https://orcid.org/0000-0002-8793-1451","affiliations":[{"raw_affiliation_string":"NOVA Information Management School (NOVA IMS), Universidade NOVA de Lisboa, Campus de Campolide, Lisbon, Portugal","institution_ids":["https://openalex.org/I83558840"]},{"raw_affiliation_string":"NOVA Information Management School (NOVA IMS), Universidade NOVA de Lisboa, Campus de Campolide, Lisboa, Portugal","institution_ids":["https://openalex.org/I83558840"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":1.8328,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.87802428,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":"12","issue":null,"first_page":"155136","last_page":"155150"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9602000117301941,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9602000117301941,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8436434268951416},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7644951939582825},{"id":"https://openalex.org/keywords/audio-mining","display_name":"Audio mining","score":0.6160334348678589},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.46174412965774536},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.43079033493995667},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4274606704711914},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.41511720418930054},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.38079315423965454}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8436434268951416},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7644951939582825},{"id":"https://openalex.org/C157968479","wikidata":"https://www.wikidata.org/wiki/Q3079876","display_name":"Audio mining","level":4,"score":0.6160334348678589},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.46174412965774536},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.43079033493995667},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4274606704711914},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.41511720418930054},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.38079315423965454}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2024.3482970","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2024.3482970","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:198166fd446848a89cad4d39bdb02b75","is_oa":true,"landing_page_url":"https://doaj.org/article/198166fd446848a89cad4d39bdb02b75","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 12, Pp 155136-155150 (2024)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2024.3482970","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2024.3482970","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.4399999976158142,"id":"https://metadata.un.org/sdg/10"}],"awards":[{"id":"https://openalex.org/G84549046","display_name":"Information Management Research Center","funder_award_id":"UIDB/04152/2020","funder_id":"https://openalex.org/F4320334779","funder_display_name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia"}],"funders":[{"id":"https://openalex.org/F4320334779","display_name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia","ror":"https://ror.org/00snfqn58"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":94,"referenced_works":["https://openalex.org/W1554576613","https://openalex.org/W1666984270","https://openalex.org/W1773149199","https://openalex.org/W1877570817","https://openalex.org/W1970705707","https://openalex.org/W2012897754","https://openalex.org/W2023018568","https://openalex.org/W2052666245","https://openalex.org/W2064675550","https://openalex.org/W2082488947","https://openalex.org/W2090861223","https://openalex.org/W2101926813","https://openalex.org/W2105594594","https://openalex.org/W2127141656","https://openalex.org/W2128084896","https://openalex.org/W2136442326","https://openalex.org/W2137089646","https://openalex.org/W2142384583","https://openalex.org/W2143612262","https://openalex.org/W2147706354","https://openalex.org/W2148154194","https://openalex.org/W2151484683","https://openalex.org/W2155273149","https://openalex.org/W2157487986","https://openalex.org/W2163212981","https://openalex.org/W2165712214","https://openalex.org/W2167969117","https://openalex.org/W2168171912","https://openalex.org/W2250539671","https://openalex.org/W2425121537","https://openalex.org/W2593116425","https://openalex.org/W2842511635","https://openalex.org/W2882319491","https://openalex.org/W2888156139","https://openalex.org/W2912581782","https://openalex.org/W2914699769","https://openalex.org/W2930959424","https://openalex.org/W2962719052","https://openalex.org/W2965373594","https://openalex.org/W2970641574","https://openalex.org/W3015449694","https://openalex.org/W3015654635","https://openalex.org/W3025787433","https://openalex.org/W3035725276","https://openalex.org/W3095410713","https://openalex.org/W3127686677","https://openalex.org/W3162391496","https://openalex.org/W3176445421","https://openalex.org/W3209059054","https://openalex.org/W3209984917","https://openalex.org/W4210327373","https://openalex.org/W4284898017","https://openalex.org/W4300672471","https://openalex.org/W4361199948","https://openalex.org/W4372260310","https://openalex.org/W4372266552","https://openalex.org/W4385245566","https://openalex.org/W4385572536","https://openalex.org/W4386058787","https://openalex.org/W4389600306","https://openalex.org/W4392487628","https://openalex.org/W4392931320","https://openalex.org/W4399265137","https://openalex.org/W6606244218","https://openalex.org/W6636510571","https://openalex.org/W6639102338","https://openalex.org/W6675751002","https://openalex.org/W6679434410","https://openalex.org/W6730323794","https://openalex.org/W6750665317","https://openalex.org/W6754278344","https://openalex.org/W6755207826","https://openalex.org/W6768021236","https://openalex.org/W6769627184","https://openalex.org/W6771467084","https://openalex.org/W6779068807","https://openalex.org/W6780218876","https://openalex.org/W6784011672","https://openalex.org/W6788705105","https://openalex.org/W6791353385","https://openalex.org/W6791429434","https://openalex.org/W6793728465","https://openalex.org/W6793736971","https://openalex.org/W6802465204","https://openalex.org/W6803092890","https://openalex.org/W6838929754","https://openalex.org/W6843026064","https://openalex.org/W6847363464","https://openalex.org/W6851663557","https://openalex.org/W6852653005","https://openalex.org/W6852934099","https://openalex.org/W6860303509","https://openalex.org/W6862270480","https://openalex.org/W6873032455"],"related_works":["https://openalex.org/W2541680182","https://openalex.org/W1911859126","https://openalex.org/W2131711534","https://openalex.org/W2559040841","https://openalex.org/W114661351","https://openalex.org/W642007152","https://openalex.org/W46679383","https://openalex.org/W2056066842","https://openalex.org/W2184127972","https://openalex.org/W2343205865"],"abstract_inverted_index":{"This":[0,141],"paper":[1],"presents":[2],"a":[3,47,81,88,92],"novel":[4],"methodology":[5,79],"for":[6],"enhancing":[7],"Automatic":[8],"Speech":[9],"Recognition":[10],"(ASR)":[11],"performance":[12,168],"by":[13,97],"utilizing":[14],"contrastive":[15,48],"learning":[16,49],"to":[17,51,153,197,212],"filter":[18],"synthetic":[19,28,55,148],"audio":[20,56],"data.":[21],"We":[22,76,173],"address":[23],"the":[24,63,78,105,144,176,191,204,209],"challenge":[25],"of":[26,54,67,146,183],"incorporating":[27],"data":[29,39,149],"into":[30],"ASR":[31,167],"training,":[32],"especially":[33,137],"in":[34],"scenarios":[35],"with":[36],"limited":[37],"real-world":[38],"or":[40],"unique":[41,98],"linguistic":[42],"characteristics.":[43,116],"The":[44,160],"method":[45],"utilizes":[46],"model":[50,112,155],"align":[52,73],"representations":[53],"and":[57,65,91,114,151,157,164,193,208],"its":[58],"corresponding":[59],"text":[60],"transcripts,":[61],"enabling":[62],"identification":[64],"removal":[66],"low-quality":[68],"samples":[69],"that":[70,104],"do":[71],"not":[72,132],"well":[74,189],"semantically.":[75],"evaluate":[77],"on":[80,110,138],"medium-resource":[82],"language":[83,171],"across":[84,169],"two":[85],"distinct":[86],"datasets:":[87],"general-domain":[89],"dataset":[90,95,115],"regionally":[93],"specific":[94,154],"characterized":[96],"pronunciation":[99],"patterns.":[100],"Experimental":[101],"results":[102],"reveal":[103],"optimal":[106],"filtering":[107,152],"strategy":[108],"depends":[109],"both":[111],"capacity":[113],"Larger":[117],"models,":[118],"like":[119],"Whisper":[120,206],"Large":[121],"V3,":[122],"particularly":[123],"benefit":[124],"from":[125],"aggressive":[126],"filtering,":[127,136],"while":[128],"smaller":[129],"models":[130,207],"may":[131],"require":[133],"such":[134],"stringent":[135],"non-normalized":[139],"text.":[140],"work":[142],"highlights":[143],"importance":[145],"adjusting":[147],"augmentation":[150],"architectures":[156],"target":[158],"domains.":[159],"proposed":[161],"method,":[162],"robust":[163],"adaptable,":[165],"enhances":[166],"diverse":[170],"settings.":[172],"have":[174],"open-sourced":[175],"entire":[177],"work,":[178],"which":[179],"includes":[180],"140":[181],"hours":[182],"synthetically":[184],"generated":[185],"Portuguese":[186],"speech,":[187],"as":[188,190],"pipeline":[192],"parameter":[194],"settings":[195],"used":[196],"create":[198],"these":[199],"samples.":[200],"Additionally,":[201],"we":[202],"provide":[203],"fine-tuned":[205],"code":[210,217],"required":[211],"reproduce":[213],"this":[214],"research.":[215],"Our":[216],"will":[218],"be":[219],"available":[220],"at":[221],"<uri":[222],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[223],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">https://github.com/my-north-ai/semantic_audio_filtering</uri>.":[224]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":5}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
