{"id":"https://openalex.org/W4372260078","doi":"https://doi.org/10.1109/icassp49357.2023.10096184","title":"Less Is More: A Unified Architecture for Device-Directed Speech Detection with Multiple Invocation Types","display_name":"Less Is More: A Unified Architecture for Device-Directed Speech Detection with Multiple Invocation Types","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4372260078","doi":"https://doi.org/10.1109/icassp49357.2023.10096184"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10096184","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096184","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5076868683","display_name":"Oggi Rudovic","orcid":null},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Oggi Rudovic","raw_affiliation_strings":["Apple"],"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038745666","display_name":"Wonil Chang","orcid":"https://orcid.org/0000-0002-4457-4917"},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Wonil Chang","raw_affiliation_strings":["Apple"],"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083789644","display_name":"Vineet Garg","orcid":null},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Vineet Garg","raw_affiliation_strings":["Apple"],"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063425263","display_name":"Pranay Dighe","orcid":null},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Pranay Dighe","raw_affiliation_strings":["Apple"],"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006330185","display_name":"Pramod Simha","orcid":null},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Pramod Simha","raw_affiliation_strings":["Apple"],"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090461939","display_name":"Jack Berkowitz","orcid":null},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jack Berkowitz","raw_affiliation_strings":["Apple"],"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101807751","display_name":"Ahmed Hussen Abdelaziz","orcid":"https://orcid.org/0000-0001-8027-4666"},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Ahmed H. Abdelaziz","raw_affiliation_strings":["Apple"],"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065814281","display_name":"Sachin Kajarekar","orcid":null},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Sachin Kajarekar","raw_affiliation_strings":["Apple"],"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009327522","display_name":"Erik Marchi","orcid":"https://orcid.org/0000-0002-5335-6356"},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Erik Marchi","raw_affiliation_strings":["Apple"],"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5068392026","display_name":"Saurabh Adya","orcid":"https://orcid.org/0009-0000-4533-6577"},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Saurabh Adya","raw_affiliation_strings":["Apple"],"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5076868683"],"corresponding_institution_ids":["https://openalex.org/I4210107260"],"apc_list":null,"apc_paid":null,"fwci":0.4075,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.5399061,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/invocation","display_name":"Invocation","score":0.8042920231819153},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8039978742599487}],"concepts":[{"id":"https://openalex.org/C2776527387","wikidata":"https://www.wikidata.org/wiki/Q1671839","display_name":"Invocation","level":2,"score":0.8042920231819153},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8039978742599487},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10096184","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096184","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.7300000190734863,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W2064675550","https://openalex.org/W2407023693","https://openalex.org/W2783652918","https://openalex.org/W2797759721","https://openalex.org/W2887080793","https://openalex.org/W2891267443","https://openalex.org/W2891722048","https://openalex.org/W3011404287","https://openalex.org/W3015574151","https://openalex.org/W3161038873","https://openalex.org/W3163423118","https://openalex.org/W3198850376","https://openalex.org/W3206716742","https://openalex.org/W4213154531","https://openalex.org/W4297841227","https://openalex.org/W4297841617","https://openalex.org/W4385245566","https://openalex.org/W6739901393","https://openalex.org/W6754747464"],"related_works":["https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W4255072433","https://openalex.org/W2527536206","https://openalex.org/W4247753279","https://openalex.org/W2351971922","https://openalex.org/W2049133474","https://openalex.org/W1571558219","https://openalex.org/W2378346884","https://openalex.org/W2383416687"],"abstract_inverted_index":{"Suppressing":[0],"unintended":[1],"invocation":[2,36,72,101,117,183],"of":[3,7,34,71,85,149,175,195,205,246],"the":[4,8,38,59,63,69,90,147,150,156,161,169,193,196,202,247],"device":[5],"because":[6],"speech":[9],"that":[10,159,174,251],"sounds":[11],"like":[12],"wake-word,":[13],"or":[14,48],"accidental":[15],"button":[16],"presses,":[17],"is":[18,26,43,75,92,126,140],"critical":[19],"for":[20,52,62,77,113,155,180,230],"a":[21,49,108,129,134,176,181,227,255],"good":[22],"user":[23],"experience,":[24],"and":[25,80,211,233,243,249],"referred":[27],"to":[28,41,44,94,173,259],"as":[29],"False-Trigger-Mitigation":[30],"(FTM).":[31],"In":[32,146,199],"case":[33,204],"multiple":[35,116],"options,":[37,73],"traditional":[39,256],"approach":[40,65,258],"FTM":[42,114,151],"use":[45],"invocation-specific":[46,143,197],"models,":[47],"single":[50,182],"model":[51,139,163,170,178,229],"all":[53],"invocations.":[54],"Both":[55],"approaches":[56],"are":[57,119],"sub-optimal:":[58],"memory":[60],"cost":[61],"former":[64],"grows":[66],"linearly":[67],"with":[68,142,226],"number":[70],"which":[74],"prohibitive":[76],"on-device":[78],"deployment,":[79],"does":[81],"not":[82,187],"take":[83],"advantage":[84],"shared":[86,162],"training":[87],"data;":[88],"while":[89],"latter":[91],"unable":[93],"accurately":[95],"capture":[96],"acoustic":[97,137],"differences":[98],"across":[99,165],"different":[100],"types.":[102],"To":[103],"this":[104],"end,":[105],"we":[106,153,185,208,240],"propose":[107,241],"Unified":[109],"Acoustic":[110],"Detector":[111],"(UAD)":[112],"when":[115,224],"options":[118],"available":[120],"on":[121],"device.":[122],"The":[123],"proposed":[124],"UAD":[125],"trained":[127,136],"using":[128,160],"multi-task":[130],"learning":[131],"framework,":[132],"where":[133],"jointly":[135],"encoder":[138],"augmented":[141],"classification":[144],"layers.":[145],"context":[148],"task,":[152],"show":[154,250],"first":[157],"time":[158],"architecture":[164],"invocations":[166],"(thus,":[167],"keeping":[168],"size":[171],"similar":[172],"monolithic":[177],"used":[179],"type),":[184],"can":[186],"only":[188],"match":[189],"but":[190],"largely":[191],"improve":[192],"accuracy":[194],"models.":[198],"particular,":[200],"in":[201,215],"challenging":[203],"touch-based":[206],"invocation,":[207,237],"obtain":[209],"50%":[210],"35%":[212],"relative":[213],"improvement":[214],"false":[216],"positive":[217,222],"rate":[218],"at":[219],"99%":[220],"true":[221],"rate,":[223],"compared":[225],"singleoutput":[228],"both":[231,253],"invocations,":[232],"separate":[234],"models":[235],"per":[236],"respectively.":[238],"Furthermore,":[239],"streaming":[242],"non-streaming":[244],"variants":[245],"UAD,":[248],"they":[252],"outperform":[254],"ASR-based":[257],"FTM.":[260]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
