{"id":"https://openalex.org/W4411934141","doi":"https://doi.org/10.1162/tacl_a_00759","title":"A Comparative Approach for Auditing Multilingual Phonetic Transcript Archives","display_name":"A Comparative Approach for Auditing Multilingual Phonetic Transcript Archives","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4411934141","doi":"https://doi.org/10.1162/tacl_a_00759"},"language":"en","primary_location":{"id":"doi:10.1162/tacl_a_00759","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00759","pdf_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00759/2534926/tacl_a_00759.pdf","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00759/2534926/tacl_a_00759.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5086887575","display_name":"Farhan Samir","orcid":null},"institutions":[{"id":"https://openalex.org/I4210127509","display_name":"Vector Institute","ror":"https://ror.org/03kqdja62","country_code":"CA","type":"facility","lineage":["https://openalex.org/I4210127509"]},{"id":"https://openalex.org/I141945490","display_name":"University of British Columbia","ror":"https://ror.org/03rmrcq20","country_code":"CA","type":"education","lineage":["https://openalex.org/I141945490"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Farhan Samir","raw_affiliation_strings":["University of British Columbia, Canada. fsamir@mail.ubc.ca","Vector Institute for AI, Canada"],"affiliations":[{"raw_affiliation_string":"University of British Columbia, Canada. fsamir@mail.ubc.ca","institution_ids":["https://openalex.org/I141945490"]},{"raw_affiliation_string":"Vector Institute for AI, Canada","institution_ids":["https://openalex.org/I4210127509"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090594863","display_name":"Emily P. Ahn","orcid":null},"institutions":[{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Emily P. Ahn","raw_affiliation_strings":["University of Washington, USA"],"affiliations":[{"raw_affiliation_string":"University of Washington, USA","institution_ids":["https://openalex.org/I201448701"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041256652","display_name":"Shreya Prakash","orcid":null},"institutions":[{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shreya Prakash","raw_affiliation_strings":["University of Washington, USA"],"affiliations":[{"raw_affiliation_string":"University of Washington, USA","institution_ids":["https://openalex.org/I201448701"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023614533","display_name":"M\u00e1rton S\u00f3skuthy","orcid":"https://orcid.org/0000-0002-5074-4767"},"institutions":[{"id":"https://openalex.org/I141945490","display_name":"University of British Columbia","ror":"https://ror.org/03rmrcq20","country_code":"CA","type":"education","lineage":["https://openalex.org/I141945490"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"M\u00e1rton Soskuthy","raw_affiliation_strings":["University of British Columbia, Canada"],"affiliations":[{"raw_affiliation_string":"University of British Columbia, Canada","institution_ids":["https://openalex.org/I141945490"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006531172","display_name":"Vered Shwartz","orcid":"https://orcid.org/0000-0002-1151-4379"},"institutions":[{"id":"https://openalex.org/I141945490","display_name":"University of British Columbia","ror":"https://ror.org/03rmrcq20","country_code":"CA","type":"education","lineage":["https://openalex.org/I141945490"]},{"id":"https://openalex.org/I4210127509","display_name":"Vector Institute","ror":"https://ror.org/03kqdja62","country_code":"CA","type":"facility","lineage":["https://openalex.org/I4210127509"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Vered Shwartz","raw_affiliation_strings":["University of British Columbia, Canada","Vector Institute for AI, Canada"],"affiliations":[{"raw_affiliation_string":"University of British Columbia, Canada","institution_ids":["https://openalex.org/I141945490"]},{"raw_affiliation_string":"Vector Institute for AI, Canada","institution_ids":["https://openalex.org/I4210127509"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5114985981","display_name":"Jian Zhu","orcid":"https://orcid.org/0000-0003-1000-1990"},"institutions":[{"id":"https://openalex.org/I141945490","display_name":"University of British Columbia","ror":"https://ror.org/03rmrcq20","country_code":"CA","type":"education","lineage":["https://openalex.org/I141945490"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Jian Zhu","raw_affiliation_strings":["University of British Columbia, Canada"],"affiliations":[{"raw_affiliation_string":"University of British Columbia, Canada","institution_ids":["https://openalex.org/I141945490"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5086887575"],"corresponding_institution_ids":["https://openalex.org/I141945490","https://openalex.org/I4210127509"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.07492177,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"13","issue":null,"first_page":"595","last_page":"612"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9934999942779541,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13996","display_name":"Diverse Musicological Studies","score":0.982699990272522,"subfield":{"id":"https://openalex.org/subfields/1210","display_name":"Music"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7864186763763428},{"id":"https://openalex.org/keywords/audit","display_name":"Audit","score":0.6393484473228455},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.46485862135887146},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.38825079798698425},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.38016730546951294},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.36400139331817627},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3479721248149872},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.3294486105442047},{"id":"https://openalex.org/keywords/accounting","display_name":"Accounting","score":0.2088136374950409},{"id":"https://openalex.org/keywords/business","display_name":"Business","score":0.06077072024345398}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7864186763763428},{"id":"https://openalex.org/C199521495","wikidata":"https://www.wikidata.org/wiki/Q181487","display_name":"Audit","level":2,"score":0.6393484473228455},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.46485862135887146},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.38825079798698425},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.38016730546951294},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.36400139331817627},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3479721248149872},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.3294486105442047},{"id":"https://openalex.org/C121955636","wikidata":"https://www.wikidata.org/wiki/Q4116214","display_name":"Accounting","level":1,"score":0.2088136374950409},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.06077072024345398},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1162/tacl_a_00759","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00759","pdf_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00759/2534926/tacl_a_00759.pdf","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1162/tacl_a_00759","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00759","pdf_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00759/2534926/tacl_a_00759.pdf","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1304196372","display_name":null,"funder_award_id":"Program","funder_id":"https://openalex.org/F4320334593","funder_display_name":"Natural Sciences and Engineering Research Council of Canada"},{"id":"https://openalex.org/G2165548363","display_name":null,"funder_award_id":"Canada","funder_id":"https://openalex.org/F4320334593","funder_display_name":"Natural Sciences and Engineering Research Council of Canada"},{"id":"https://openalex.org/G3033334534","display_name":null,"funder_award_id":"AI Chair","funder_id":"https://openalex.org/F4320309949","funder_display_name":"Canadian Institute for Advanced Research"},{"id":"https://openalex.org/G5784215521","display_name":null,"funder_award_id":"Chair","funder_id":"https://openalex.org/F4320334593","funder_display_name":"Natural Sciences and Engineering Research Council of Canada"},{"id":"https://openalex.org/G8801411701","display_name":null,"funder_award_id":"PGS-D","funder_id":"https://openalex.org/F4320334593","funder_display_name":"Natural Sciences and Engineering Research Council of Canada"}],"funders":[{"id":"https://openalex.org/F4320309949","display_name":"Canadian Institute for Advanced Research","ror":"https://ror.org/01sdtdd95"},{"id":"https://openalex.org/F4320334593","display_name":"Natural Sciences and Engineering Research Council of Canada","ror":"https://ror.org/01h531d29"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4411934141.pdf","grobid_xml":"https://content.openalex.org/works/W4411934141.grobid-xml"},"referenced_works_count":57,"referenced_works":["https://openalex.org/W1583837637","https://openalex.org/W2013784666","https://openalex.org/W2051576146","https://openalex.org/W2074231493","https://openalex.org/W2126055901","https://openalex.org/W2128619084","https://openalex.org/W2612690371","https://openalex.org/W2798935874","https://openalex.org/W2887428522","https://openalex.org/W2914304175","https://openalex.org/W3003646990","https://openalex.org/W3015877095","https://openalex.org/W3035051527","https://openalex.org/W3035111735","https://openalex.org/W3100501376","https://openalex.org/W3119308075","https://openalex.org/W3133702157","https://openalex.org/W3137010024","https://openalex.org/W3144247233","https://openalex.org/W3189849087","https://openalex.org/W3198105115","https://openalex.org/W3206010442","https://openalex.org/W3207830467","https://openalex.org/W3213029956","https://openalex.org/W3213241618","https://openalex.org/W3215385857","https://openalex.org/W4205801577","https://openalex.org/W4211190719","https://openalex.org/W4224629880","https://openalex.org/W4247156940","https://openalex.org/W4319862635","https://openalex.org/W4322714819","https://openalex.org/W4385571124","https://openalex.org/W4385822546","https://openalex.org/W4385822973","https://openalex.org/W4386644478","https://openalex.org/W4388717490","https://openalex.org/W4392903765","https://openalex.org/W4393321314","https://openalex.org/W4393849379","https://openalex.org/W4401042860","https://openalex.org/W4401043238","https://openalex.org/W4402111462","https://openalex.org/W4402112192","https://openalex.org/W4403681535","https://openalex.org/W4404782765","https://openalex.org/W4408355279","https://openalex.org/W6731852758","https://openalex.org/W6739585900","https://openalex.org/W6778690528","https://openalex.org/W6788798556","https://openalex.org/W6804095016","https://openalex.org/W6810220367","https://openalex.org/W6847363464","https://openalex.org/W6856193217","https://openalex.org/W6857810165","https://openalex.org/W6875838015"],"related_works":["https://openalex.org/W2404937507","https://openalex.org/W3121186197","https://openalex.org/W2373849942","https://openalex.org/W151161666","https://openalex.org/W2030757640","https://openalex.org/W2118407572","https://openalex.org/W2071653420","https://openalex.org/W2260291664","https://openalex.org/W2360464208","https://openalex.org/W4366384404"],"abstract_inverted_index":{"Abstract":[0],"Curating":[1],"datasets":[2],"that":[3,97],"span":[4],"multiple":[5],"languages":[6],"is":[7],"challenging.":[8],"To":[9],"make":[10],"the":[11,24,52,107],"collection":[12],"more":[13,20],"scalable,":[14],"researchers":[15],"often":[16],"incorporate":[17],"one":[18],"or":[19],"imperfect":[21],"classifiers":[22],"in":[23,38,82,125],"process,":[25],"like":[26],"language":[27,40,68,80],"identification":[28],"models.":[29],"These":[30],"models,":[31],"however,":[32],"are":[33,71],"prone":[34],"to":[35,73],"failure,":[36],"resulting":[37],"some":[39],"partitions":[41,81,101],"being":[42],"unreliable":[43,59],"for":[44,56,66,78,106,134],"downstream":[45,108],"tasks.":[46],"We":[47,95],"introduce":[48],"a":[49,67,83,118],"statistical":[50],"test,":[51],"Preference":[53],"Proportion":[54],"Test,":[55],"identifying":[57],"such":[58],"partitions.":[60],"By":[61],"annotating":[62],"only":[63],"20":[64],"samples":[65],"partition,":[69],"we":[70],"able":[72],"identify":[74],"systematic":[75],"transcription":[76,112],"errors":[77],"10":[79],"recent":[84],"large":[85],"multilingual":[86,136],"transcribed":[87],"audio":[88,137],"archive,":[89],"X-IPAPack":[90],"(Zhu":[91],"et":[92],"al.,":[93],"2024).":[94],"find":[96],"filtering":[98],"these":[99],"low-quality":[100],"out":[102],"when":[103],"training":[104],"models":[105],"task":[109],"of":[110],"phonetic":[111],"brings":[113],"substantial":[114],"benefits,":[115],"most":[116],"notably":[117],"25.7%":[119],"relative":[120],"improvement":[121],"on":[122],"transcribing":[123],"recordings":[124],"out-of-distribution":[126],"languages.":[127],"Our":[128],"work":[129],"contributes":[130],"an":[131],"effective":[132],"method":[133],"auditing":[135],"archives.1":[138]},"counts_by_year":[],"updated_date":"2026-04-15T08:11:43.952461","created_date":"2025-10-10T00:00:00"}
