{"id":"https://openalex.org/W4392903879","doi":"https://doi.org/10.1109/icassp48485.2024.10446326","title":"Prioritizing Data Acquisition for end-to-end Speech Model Improvement","display_name":"Prioritizing Data Acquisition for end-to-end Speech Model Improvement","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392903879","doi":"https://doi.org/10.1109/icassp48485.2024.10446326"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10446326","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp48485.2024.10446326","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5083365850","display_name":"Alkis Koudounas","orcid":"https://orcid.org/0000-0003-4386-0409"},"institutions":[{"id":"https://openalex.org/I177477856","display_name":"Polytechnic University of Turin","ror":"https://ror.org/00bgk9508","country_code":"IT","type":"education","lineage":["https://openalex.org/I177477856"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Alkis Koudounas","raw_affiliation_strings":["Politecnico di Torino,Turin,Italy","Politecnico di Torino, Turin, Italy"],"affiliations":[{"raw_affiliation_string":"Politecnico di Torino,Turin,Italy","institution_ids":["https://openalex.org/I177477856"]},{"raw_affiliation_string":"Politecnico di Torino, Turin, Italy","institution_ids":["https://openalex.org/I177477856"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057352420","display_name":"Eliana Pastor","orcid":"https://orcid.org/0000-0002-3664-4137"},"institutions":[{"id":"https://openalex.org/I177477856","display_name":"Polytechnic University of Turin","ror":"https://ror.org/00bgk9508","country_code":"IT","type":"education","lineage":["https://openalex.org/I177477856"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Eliana Pastor","raw_affiliation_strings":["Politecnico di Torino,Turin,Italy","Politecnico di Torino, Turin, Italy"],"affiliations":[{"raw_affiliation_string":"Politecnico di Torino,Turin,Italy","institution_ids":["https://openalex.org/I177477856"]},{"raw_affiliation_string":"Politecnico di Torino, Turin, Italy","institution_ids":["https://openalex.org/I177477856"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001986834","display_name":"Giuseppe Attanasio","orcid":"https://orcid.org/0000-0001-6945-3698"},"institutions":[{"id":"https://openalex.org/I71209653","display_name":"Bocconi University","ror":"https://ror.org/05crjpb27","country_code":"IT","type":"education","lineage":["https://openalex.org/I71209653"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Giuseppe Attanasio","raw_affiliation_strings":["Bocconi University,Milan,Italy","Bocconi University, Milan, Italy"],"affiliations":[{"raw_affiliation_string":"Bocconi University,Milan,Italy","institution_ids":["https://openalex.org/I71209653"]},{"raw_affiliation_string":"Bocconi University, Milan, Italy","institution_ids":["https://openalex.org/I71209653"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050777140","display_name":"Luca de Alfaro","orcid":"https://orcid.org/0000-0003-3856-4576"},"institutions":[{"id":"https://openalex.org/I185103710","display_name":"University of California, Santa Cruz","ror":"https://ror.org/03s65by71","country_code":"US","type":"education","lineage":["https://openalex.org/I185103710"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Luca de Alfaro","raw_affiliation_strings":["University of California,Santa Cruz,CA,USA","University of California, Santa Cruz, CA, USA"],"affiliations":[{"raw_affiliation_string":"University of California,Santa Cruz,CA,USA","institution_ids":["https://openalex.org/I185103710"]},{"raw_affiliation_string":"University of California, Santa Cruz, CA, USA","institution_ids":["https://openalex.org/I185103710"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001695309","display_name":"Elena Baralis","orcid":"https://orcid.org/0000-0001-9231-467X"},"institutions":[{"id":"https://openalex.org/I177477856","display_name":"Polytechnic University of Turin","ror":"https://ror.org/00bgk9508","country_code":"IT","type":"education","lineage":["https://openalex.org/I177477856"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Elena Baralis","raw_affiliation_strings":["Politecnico di Torino,Turin,Italy","Politecnico di Torino, Turin, Italy"],"affiliations":[{"raw_affiliation_string":"Politecnico di Torino,Turin,Italy","institution_ids":["https://openalex.org/I177477856"]},{"raw_affiliation_string":"Politecnico di Torino, Turin, Italy","institution_ids":["https://openalex.org/I177477856"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5083365850"],"corresponding_institution_ids":["https://openalex.org/I177477856"],"apc_list":null,"apc_paid":null,"fwci":1.3781,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.82907184,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":"3","issue":null,"first_page":"7000","last_page":"7004"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7880327105522156},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6766535043716431},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.6184877157211304},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.5986382961273193},{"id":"https://openalex.org/keywords/data-acquisition","display_name":"Data acquisition","score":0.5279081463813782},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5245599746704102},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5079618096351624},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48167335987091064},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.47134870290756226},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.45661303400993347},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.4490641951560974},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.43576669692993164},{"id":"https://openalex.org/keywords/data-quality","display_name":"Data quality","score":0.4248208999633789},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.36504411697387695},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.19049155712127686},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.12369728088378906},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08174872398376465}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7880327105522156},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6766535043716431},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.6184877157211304},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.5986382961273193},{"id":"https://openalex.org/C163985040","wikidata":"https://www.wikidata.org/wiki/Q1172399","display_name":"Data acquisition","level":2,"score":0.5279081463813782},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5245599746704102},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5079618096351624},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48167335987091064},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.47134870290756226},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.45661303400993347},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.4490641951560974},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.43576669692993164},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.4248208999633789},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.36504411697387695},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.19049155712127686},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.12369728088378906},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08174872398376465},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.0},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10446326","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp48485.2024.10446326","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1778956461","display_name":"Incorporating Demographic Factors into Natural Language Processing Models","funder_award_id":"949944","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G2020622295","display_name":null,"funder_award_id":"Big Data","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G3650733657","display_name":null,"funder_award_id":"NextGenerationEU","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G4508289328","display_name":null,"funder_award_id":"PE00000013","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G4956428346","display_name":null,"funder_award_id":"Horizon 2020 research and innovatio","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G7248538987","display_name":null,"funder_award_id":"1555 11/10/2022","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G8318064016","display_name":null,"funder_award_id":"Horizon","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G8893660128","display_name":null,"funder_award_id":"PE0000001","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"}],"funders":[{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"},{"id":"https://openalex.org/F4320334678","display_name":"European Research Council","ror":"https://ror.org/0472cxd90"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W2119088781","https://openalex.org/W2807251972","https://openalex.org/W2896331720","https://openalex.org/W2972584841","https://openalex.org/W2979826702","https://openalex.org/W3036601975","https://openalex.org/W3171842281","https://openalex.org/W3173173856","https://openalex.org/W3197613133","https://openalex.org/W3213029956","https://openalex.org/W4296069263","https://openalex.org/W4372346208","https://openalex.org/W4385270096","https://openalex.org/W4385823123","https://openalex.org/W6677766302","https://openalex.org/W6751917733","https://openalex.org/W6780218876"],"related_works":["https://openalex.org/W2383111961","https://openalex.org/W2365952365","https://openalex.org/W2352448290","https://openalex.org/W2380820513","https://openalex.org/W3081133439","https://openalex.org/W2945537679","https://openalex.org/W4386246791","https://openalex.org/W2133103607","https://openalex.org/W3211701140","https://openalex.org/W2952280724"],"abstract_inverted_index":{"As":[0],"speech":[1,130],"processing":[2],"moves":[3],"toward":[4],"more":[5,29],"data-hungry":[6],"models,":[7],"data":[8,36,45,54],"selection":[9],"and":[10,56,64,140,154],"acquisition":[11,46,153],"become":[12],"crucial":[13],"to":[14,67,92,115],"building":[15],"better":[16,50,148],"systems.":[17],"Recent":[18],"efforts":[19],"have":[20],"championed":[21],"quantity":[22],"over":[23],"quality,":[24],"following":[25],"the":[26,31,38,79,84,97,107],"mantra":[27],"\"The":[28],"data,":[30],"better.\"":[32],"However,":[33],"not":[34],"every":[35],"brings":[37],"same":[39],"benefit.":[40],"This":[41],"paper":[42],"proposes":[43],"a":[44,60,62,71],"solution":[47],"that":[48,95,143],"yields":[49],"models":[51,131],"with":[52,73,127],"less":[53],"\u2013":[55],"lower":[57],"cost.":[58],"Given":[59],"model,":[61],"task,":[63],"an":[65],"objective":[66,99],"maximize,":[68],"we":[69,77,87,110,118],"propose":[70],"process":[72],"three":[74],"steps.":[75],"First,":[76],"assess":[78],"model\u2019s":[80],"baseline":[81],"performance":[82],"on":[83],"task.":[85],"Second,":[86],"use":[88],"efficient":[89],"mining":[90],"techniques":[91],"identify":[93],"subgroups":[94,108],"maximize":[96],"target":[98],"if":[100],"acquired":[101],"first":[102],"as":[103],"new":[104],"samples.":[105],"Being":[106],"interpretable,":[109],"can":[111],"determine":[112],"which":[113],"samples":[114],"acquire.":[116],"Third,":[117],"run":[119],"incremental":[120],"training":[121],"sampling":[122],"from":[123],"those":[124],"subgroups.":[125],"Experiments":[126],"two":[128,136],"state-of-the-art":[129],"for":[132],"Intent":[133],"Classification":[134],"across":[135],"datasets":[137],"in":[138],"English":[139],"Italian":[141],"show":[142],"our":[144],"method":[145],"is":[146],"significantly":[147],"than":[149],"random":[150],"or":[151],"complete":[152],"clustering-based":[155],"techniques.":[156]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
