{"id":"https://openalex.org/W4392903329","doi":"https://doi.org/10.1109/icassp48485.2024.10447652","title":"Phoneme-Aware Encoding for Prefix-Tree-Based Contextual ASR","display_name":"Phoneme-Aware Encoding for Prefix-Tree-Based Contextual ASR","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392903329","doi":"https://doi.org/10.1109/icassp48485.2024.10447652"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10447652","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447652","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034216052","display_name":"Hayato Futami","orcid":null},"institutions":[{"id":"https://openalex.org/I2800278093","display_name":"Sony Corporation (United States)","ror":"https://ror.org/05k91zb11","country_code":"US","type":"company","lineage":["https://openalex.org/I2800278093"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Hayato Futami","raw_affiliation_strings":["Sony Group Corporation"],"affiliations":[{"raw_affiliation_string":"Sony Group Corporation","institution_ids":["https://openalex.org/I2800278093"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011937407","display_name":"Emiru Tsunoo","orcid":null},"institutions":[{"id":"https://openalex.org/I2800278093","display_name":"Sony Corporation (United States)","ror":"https://ror.org/05k91zb11","country_code":"US","type":"company","lineage":["https://openalex.org/I2800278093"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Emiru Tsunoo","raw_affiliation_strings":["Sony Group Corporation"],"affiliations":[{"raw_affiliation_string":"Sony Group Corporation","institution_ids":["https://openalex.org/I2800278093"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109644502","display_name":"Yosuke Kashiwagi","orcid":null},"institutions":[{"id":"https://openalex.org/I2800278093","display_name":"Sony Corporation (United States)","ror":"https://ror.org/05k91zb11","country_code":"US","type":"company","lineage":["https://openalex.org/I2800278093"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yosuke Kashiwagi","raw_affiliation_strings":["Sony Group Corporation"],"affiliations":[{"raw_affiliation_string":"Sony Group Corporation","institution_ids":["https://openalex.org/I2800278093"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110752097","display_name":"Hiroaki Ogawa","orcid":null},"institutions":[{"id":"https://openalex.org/I2800278093","display_name":"Sony Corporation (United States)","ror":"https://ror.org/05k91zb11","country_code":"US","type":"company","lineage":["https://openalex.org/I2800278093"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hiroaki Ogawa","raw_affiliation_strings":["Sony Group Corporation"],"affiliations":[{"raw_affiliation_string":"Sony Group Corporation","institution_ids":["https://openalex.org/I2800278093"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047892839","display_name":"Siddhant Arora","orcid":"https://orcid.org/0000-0003-0375-496X"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Siddhant Arora","raw_affiliation_strings":["Carnegie Mellon University"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001291873","display_name":"Shinji Watanabe","orcid":"https://orcid.org/0000-0002-5970-8631"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shinji Watanabe","raw_affiliation_strings":["Carnegie Mellon University"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5034216052"],"corresponding_institution_ids":["https://openalex.org/I2800278093"],"apc_list":null,"apc_paid":null,"fwci":2.7562,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.91003557,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"10641","last_page":"10645"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8390558958053589},{"id":"https://openalex.org/keywords/prefix","display_name":"Prefix","score":0.7919155359268188},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6665571928024292},{"id":"https://openalex.org/keywords/grapheme","display_name":"Grapheme","score":0.661819338798523},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.6562278866767883},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4683712422847748},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.45034828782081604},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.44713258743286133},{"id":"https://openalex.org/keywords/tree","display_name":"Tree (set theory)","score":0.4221900701522827},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.10253980755805969}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8390558958053589},{"id":"https://openalex.org/C141603448","wikidata":"https://www.wikidata.org/wiki/Q134830","display_name":"Prefix","level":2,"score":0.7919155359268188},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6665571928024292},{"id":"https://openalex.org/C2776779415","wikidata":"https://www.wikidata.org/wiki/Q2545446","display_name":"Grapheme","level":3,"score":0.661819338798523},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.6562278866767883},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4683712422847748},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.45034828782081604},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44713258743286133},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.4221900701522827},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.10253980755805969},{"id":"https://openalex.org/C30080830","wikidata":"https://www.wikidata.org/wiki/Q169917","display_name":"Graphene","level":2,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10447652","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447652","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W2090755665","https://openalex.org/W2144499799","https://openalex.org/W2262393948","https://openalex.org/W2526425061","https://openalex.org/W2538246917","https://openalex.org/W2606974598","https://openalex.org/W2886319145","https://openalex.org/W2937402758","https://openalex.org/W2962780374","https://openalex.org/W2962784628","https://openalex.org/W2972625221","https://openalex.org/W2973172693","https://openalex.org/W3015995734","https://openalex.org/W3097794466","https://openalex.org/W3140235797","https://openalex.org/W3163462603","https://openalex.org/W3198004110","https://openalex.org/W4224918838","https://openalex.org/W4226462878","https://openalex.org/W4297841830","https://openalex.org/W4311000453","https://openalex.org/W4372348570","https://openalex.org/W4385822953","https://openalex.org/W4388017359","https://openalex.org/W4394862666","https://openalex.org/W6623517193","https://openalex.org/W6677499955","https://openalex.org/W6726873649","https://openalex.org/W6779469704","https://openalex.org/W6847363464","https://openalex.org/W6853406042"],"related_works":["https://openalex.org/W2506515307","https://openalex.org/W2060656088","https://openalex.org/W2142481367","https://openalex.org/W4385893898","https://openalex.org/W3196321793","https://openalex.org/W3080705045","https://openalex.org/W4285757703","https://openalex.org/W4392903329","https://openalex.org/W2129146436","https://openalex.org/W2032507829"],"abstract_inverted_index":{"In":[0],"speech":[1],"recognition":[2],"applications,":[3],"it":[4,46],"is":[5],"important":[6],"to":[7,50],"recognize":[8,52],"context-specific":[9],"rare":[10],"words,":[11],"such":[12,29],"as":[13,62],"proper":[14],"nouns.":[15],"Tree-constrained":[16],"Pointer":[17],"Generator":[18],"(TCPGen)":[19],"has":[20],"shown":[21],"promise":[22],"for":[23,105],"this":[24],"purpose,":[25],"which":[26],"efficiently":[27],"biases":[28],"words":[30,53,61],"with":[31,47,103],"a":[32],"prefix":[33],"tree.":[34],"While":[35],"the":[36,92,96,120,128],"original":[37],"TCPGen":[38,58,89,104],"relies":[39],"on":[40,118],"grapheme-based":[41,116],"encoding,":[42],"we":[43,65,79],"propose":[44,66,80],"extending":[45],"phoneme-aware":[48,69,97,112],"encoding":[49,70,113,117],"better":[51,94],"of":[54,88,130],"unusual":[55],"pronunciations.":[56],"As":[57],"handles":[59],"biasing":[60],"subword":[63],"units,":[64],"obtaining":[67],"subword-level":[68],"by":[71],"using":[72],"alignment":[73],"between":[74],"phonemes":[75],"and":[76,123],"subwords.":[77],"Furthermore,":[78],"injecting":[81],"phoneme-level":[82],"predictions":[83],"from":[84],"CTC":[85],"into":[86],"queries":[87],"so":[90],"that":[91,110],"model":[93],"interprets":[95],"encodings.":[98],"We":[99,108],"conducted":[100],"ASR":[101],"experiments":[102],"RNN":[106],"transducer.":[107],"observed":[109],"proposed":[111],"outperformed":[114],"ordinary":[115],"both":[119],"English":[121],"LibriSpeech":[122],"Japanese":[124],"CSJ":[125],"datasets,":[126],"demonstrating":[127],"robustness":[129],"our":[131],"approach":[132],"across":[133],"linguistically":[134],"diverse":[135],"languages.":[136]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":3}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
