{"id":"https://openalex.org/W3001493895","doi":"https://doi.org/10.1109/icassp40776.2020.9053577","title":"Multi-Task Learning for Voice Trigger Detection","display_name":"Multi-Task Learning for Voice Trigger Detection","publication_year":2020,"publication_date":"2020-04-09","ids":{"openalex":"https://openalex.org/W3001493895","doi":"https://doi.org/10.1109/icassp40776.2020.9053577","mag":"3001493895"},"language":"en","primary_location":{"id":"doi:10.1109/icassp40776.2020.9053577","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9053577","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2001.09519","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5113798270","display_name":"Siddharth Sigtia","orcid":null},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Siddharth Sigtia","raw_affiliation_strings":["Apple"],"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013698887","display_name":"Pascal Clark","orcid":null},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Pascal Clark","raw_affiliation_strings":["Apple"],"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000034447","display_name":"Rob Haynes","orcid":null},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Rob Haynes","raw_affiliation_strings":["Apple"],"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005078804","display_name":"Hywel Richards","orcid":null},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Hywel Richards","raw_affiliation_strings":["Apple"],"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103410518","display_name":"John S. Bridle","orcid":null},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"John Bridle","raw_affiliation_strings":["Apple"],"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5113798270"],"corresponding_institution_ids":["https://openalex.org/I4210107260"],"apc_list":null,"apc_paid":null,"fwci":1.9199,"has_fulltext":false,"cited_by_count":16,"citation_normalized_percentile":{"value":0.8856972,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":"28","issue":null,"first_page":"7449","last_page":"7453"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8291757106781006},{"id":"https://openalex.org/keywords/phrase","display_name":"Phrase","score":0.772529125213623},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6333236694335938},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6294218897819519},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.5752506256103516},{"id":"https://openalex.org/keywords/detector","display_name":"Detector","score":0.5420078635215759},{"id":"https://openalex.org/keywords/active-listening","display_name":"Active listening","score":0.5398136973381042},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.4691183865070343},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.46349501609802246},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.4401475191116333},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4322260618209839},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.42290470004081726},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.2399415671825409}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8291757106781006},{"id":"https://openalex.org/C2776224158","wikidata":"https://www.wikidata.org/wiki/Q187931","display_name":"Phrase","level":2,"score":0.772529125213623},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6333236694335938},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6294218897819519},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.5752506256103516},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.5420078635215759},{"id":"https://openalex.org/C177291462","wikidata":"https://www.wikidata.org/wiki/Q423038","display_name":"Active listening","level":2,"score":0.5398136973381042},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.4691183865070343},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.46349501609802246},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.4401475191116333},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4322260618209839},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.42290470004081726},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.2399415671825409},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icassp40776.2020.9053577","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9053577","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2001.09519","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2001.09519","pdf_url":"https://arxiv.org/pdf/2001.09519","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2001.09519","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2001.09519","pdf_url":"https://arxiv.org/pdf/2001.09519","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.6200000047683716,"display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1553469512","https://openalex.org/W2034940213","https://openalex.org/W2127141656","https://openalex.org/W2168231600","https://openalex.org/W2212465773","https://openalex.org/W2407023693","https://openalex.org/W2407793339","https://openalex.org/W2602634800","https://openalex.org/W2769205094","https://openalex.org/W2775572503","https://openalex.org/W2787752687","https://openalex.org/W2797759721","https://openalex.org/W2889511491","https://openalex.org/W2913340405","https://openalex.org/W2930364467","https://openalex.org/W2953219395","https://openalex.org/W2955926808","https://openalex.org/W2962707338","https://openalex.org/W2972402291","https://openalex.org/W2972677972","https://openalex.org/W2972951785","https://openalex.org/W2973226577","https://openalex.org/W6684859321","https://openalex.org/W6688089860","https://openalex.org/W6714171909","https://openalex.org/W6744661987","https://openalex.org/W6747003534","https://openalex.org/W6843142645"],"related_works":["https://openalex.org/W2317723112","https://openalex.org/W2475724061","https://openalex.org/W2773393136","https://openalex.org/W2174706483","https://openalex.org/W2997121352","https://openalex.org/W419536403","https://openalex.org/W2506280730","https://openalex.org/W4237969969","https://openalex.org/W1594297642","https://openalex.org/W2366328218"],"abstract_inverted_index":{"We":[0,66,129,163],"describe":[1,67],"the":[2,25,40,82,93,107,110,121,160,179,191,198,207],"design":[3,124],"of":[4,60,125,154,212],"a":[5,48,58,68,73,86,133,142,150,170,210],"voice":[6],"trigger":[7,52,61,83,111],"detection":[8,87],"system":[9],"for":[10,47,81,159],"smart":[11],"speakers.":[12],"In":[13,113],"this":[14,91,114],"study,":[15,115],"we":[16,116,148],"address":[17],"two":[18],"major":[19],"challenges.":[20],"The":[21],"first":[22],"is":[23,54,76,88,97],"that":[24,106,137,156,197],"detectors":[26],"are":[27,157],"deployed":[28],"in":[29,57,209],"complex":[30,102],"acoustic":[31,135],"environments":[32],"with":[33],"external":[34],"noise":[35],"and":[36,79,123,182,186],"loud":[37],"playback":[38],"by":[39,99,131,203],"device":[41],"itself.":[42],"Secondly,":[43],"collecting":[44],"training":[45,64,132,145],"examples":[46,155,189],"specific":[49,63],"keyword":[50],"or":[51],"phrase":[53,62],"challenging":[55,158,213],"resulting":[56],"scarcity":[59],"data.":[65],"two-stage":[69],"cascaded":[70],"architecture":[71,122],"where":[72],"low-power":[74],"detector":[75],"always":[77],"running":[78],"listening":[80],"phrase.":[84,112],"If":[85],"made":[89],"at":[90],"stage,":[92],"candidate":[94],"audio":[95],"segment":[96,108],"re-scored":[98],"larger,":[100],"more":[101],"models":[103],"to":[104,168,172,206],"verify":[105],"contains":[109],"focus":[117],"our":[118],"attention":[119],"on":[120,178],"these":[126],"second-pass":[127],"detectors.":[128],"start":[130],"general":[134],"model":[136,171,200],"produces":[138],"phonetic":[139,176],"transcriptions":[140,177],"given":[141],"large":[143],"labelled":[144],"dataset.":[146,193],"Next,":[147],"collect":[149],"much":[151],"smaller":[152,192],"dataset":[153,181],"baseline":[161,208],"system.":[162],"then":[164],"use":[165],"multi-task":[166],"learning":[167],"train":[169],"simultaneously":[173],"produce":[174],"accurate":[175],"larger":[180],"discriminate":[183],"between":[184],"true":[185],"easily":[187],"confusable":[188],"using":[190],"Our":[194],"results":[195],"demonstrate":[196],"proposed":[199],"reduces":[201],"errors":[202],"half":[204],"compared":[205],"range":[211],"test":[214],"conditions":[215],"without":[216],"requiring":[217],"extra":[218],"parameters.":[219]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":6},{"year":2020,"cited_by_count":4}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
