{"id":"https://openalex.org/W4392904733","doi":"https://doi.org/10.1109/icassp48485.2024.10446931","title":"Unsupervised Multi-Domain Data Selection for Asr Fine-Tuning","display_name":"Unsupervised Multi-Domain Data Selection for Asr Fine-Tuning","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392904733","doi":"https://doi.org/10.1109/icassp48485.2024.10446931"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10446931","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10446931","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5046443394","display_name":"Nikolaos Lagos","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Nikolaos Lagos","raw_affiliation_strings":["Naver Labs Europe,France","Naver Labs Europe, France"],"affiliations":[{"raw_affiliation_string":"Naver Labs Europe,France","institution_ids":[]},{"raw_affiliation_string":"Naver Labs Europe, France","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5034647649","display_name":"Ioan Calapodescu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ioan Calapodescu","raw_affiliation_strings":["Naver Labs Europe,France","Naver Labs Europe, France"],"affiliations":[{"raw_affiliation_string":"Naver Labs Europe,France","institution_ids":[]},{"raw_affiliation_string":"Naver Labs Europe, France","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5046443394"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.7252,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.73063208,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"10711","last_page":"10715"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.847618818283081},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.6619710922241211},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.6178402900695801},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5941440463066101},{"id":"https://openalex.org/keywords/labeled-data","display_name":"Labeled data","score":0.5788060426712036},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.5510445833206177},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.543445885181427},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.4995300769805908},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4312562346458435},{"id":"https://openalex.org/keywords/external-data-representation","display_name":"External Data Representation","score":0.41666245460510254},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4152199923992157},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.36462557315826416},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.35459935665130615}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.847618818283081},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.6619710922241211},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.6178402900695801},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5941440463066101},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.5788060426712036},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.5510445833206177},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.543445885181427},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.4995300769805908},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4312562346458435},{"id":"https://openalex.org/C116409475","wikidata":"https://www.wikidata.org/wiki/Q1385056","display_name":"External Data Representation","level":2,"score":0.41666245460510254},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4152199923992157},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.36462557315826416},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.35459935665130615},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10446931","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10446931","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W2914304175","https://openalex.org/W2963683295","https://openalex.org/W2970641574","https://openalex.org/W2981720610","https://openalex.org/W2998702515","https://openalex.org/W3114610051","https://openalex.org/W3163169798","https://openalex.org/W3167533889","https://openalex.org/W3209059054","https://openalex.org/W4224939570","https://openalex.org/W4284966089","https://openalex.org/W4287854494","https://openalex.org/W4307322847","https://openalex.org/W4319586625","https://openalex.org/W4372341252","https://openalex.org/W4372341722","https://openalex.org/W4385822247","https://openalex.org/W6780218876","https://openalex.org/W6810300553","https://openalex.org/W6846004400"],"related_works":["https://openalex.org/W1569283511","https://openalex.org/W4236193183","https://openalex.org/W2053866214","https://openalex.org/W2607505004","https://openalex.org/W2231795205","https://openalex.org/W2944691285","https://openalex.org/W3144173820","https://openalex.org/W3045896262","https://openalex.org/W4214728004","https://openalex.org/W2799180539"],"abstract_inverted_index":{"Fine-tuning":[0],"can":[1,39],"be":[2,40],"used":[3],"to":[4,11,58,186,195],"adapt":[5],"an":[6,177],"Automatic":[7],"Speech":[8],"Recognition":[9],"system":[10],"a":[12,54,65,72,79,97,108,142,148],"new":[13],"domain,":[14],"based":[15,77],"on":[16,78,192],"some":[17],"transcribed":[18],"data":[19,34,63,70,91,114,170],"from":[20,64,139],"the":[21,27,30,82,180],"target-domain.":[22],"However,":[23],"in":[24,125,151],"real-world":[25],"settings,":[26],"availability":[28],"and":[29,110,121,135,156,158],"amount":[31],"of":[32,43,68,74,81,100,104,113,131,146,153,179,184],"target-domain":[33],"needed":[35],"for":[36,94,168],"this":[37,86,162,173],"fine-tuning":[38],"limited,":[41],"because":[42],"budget":[44,99],"constraints":[45],"or":[46],"other":[47],"reasons":[48],"like":[49],"privacy.":[50],"In":[51,85],"such":[52],"cases,":[53],"possible":[55],"approach":[56],"is":[57,150],"automatically":[59],"select":[60],"candidate":[61],"training":[62,105],"pre-existing":[66],"pool":[67,112],"audio":[69],"(e.g.":[71],"mix":[73],"open-source":[75],"datasets),":[76],"sample":[80],"target":[83],"domain.":[84],"paper":[87],"we":[88,175],"investigate":[89],"unsupervised":[90],"selection":[92],"techniques":[93],"fine-tuning,":[95],"under":[96],"limited":[98],"only":[101],"one":[102],"hour":[103],"data,":[106],"using":[107,161],"multi-source":[109],"multi-domain":[111],"(7":[115],"datasets,":[116],"6k":[117],"hours,":[118],"various":[119],"genres":[120],"styles).Our":[122],"method":[123,174],"consists":[124],"1.":[126],"extracting":[127],"self-supervised":[128],"model":[129],"representations":[130,141],"both":[132],"modalities":[133],"(text":[134],"audio)":[136],"2.":[137],"learning":[138],"these":[140],"domain-calibrated":[143],"vector":[144],"representation":[145,163],"what":[147],"domain":[149],"terms":[152],"origin,":[154],"genre":[155],"style":[157],"finally":[159],"3.":[160],"with":[164],"k-nearest":[165],"neighbor":[166],"search":[167],"automatic":[169],"selection.":[171],"Using":[172],"observe":[176],"improvement":[178],"Word":[181],"Error":[182],"Rate":[183],"up":[185],"about":[187],"13%":[188],"(3.2":[189],"WER":[190],"points)":[191],"average,":[193],"compared":[194],"our":[196],"baselines.":[197]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
