{"id":"https://openalex.org/W2805399811","doi":"https://doi.org/10.1109/lsp.2018.2841653","title":"Speech Activity Detection in Naturalistic Audio Environments: Fearless Steps Apollo Corpus","display_name":"Speech Activity Detection in Naturalistic Audio Environments: Fearless Steps Apollo Corpus","publication_year":2018,"publication_date":"2018-05-28","ids":{"openalex":"https://openalex.org/W2805399811","doi":"https://doi.org/10.1109/lsp.2018.2841653","mag":"2805399811"},"language":"en","primary_location":{"id":"doi:10.1109/lsp.2018.2841653","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2018.2841653","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058042844","display_name":"Lakshmish Kaushik","orcid":null},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lakshmish Kaushik","raw_affiliation_strings":["Center for Robust Speech Systems (CRSS), University of Texas at Dallas, Richardson, TX, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Center for Robust Speech Systems (CRSS), University of Texas at Dallas, Richardson, TX, USA","institution_ids":["https://openalex.org/I162577319"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111469714","display_name":"Abhijeet Sangwan","orcid":null},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Abhijeet Sangwan","raw_affiliation_strings":["Center for Robust Speech Systems (CRSS), University of Texas at Dallas, Richardson, TX, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Center for Robust Speech Systems (CRSS), University of Texas at Dallas, Richardson, TX, USA","institution_ids":["https://openalex.org/I162577319"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5057910370","display_name":"John H. L. Hansen","orcid":"https://orcid.org/0000-0003-1382-9929"},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"John H. L. Hansen","raw_affiliation_strings":["Center for Robust Speech Systems (CRSS), University of Texas at Dallas, Richardson, TX, USA"],"raw_orcid":"https://orcid.org/0000-0003-1382-9929","affiliations":[{"raw_affiliation_string":"Center for Robust Speech Systems (CRSS), University of Texas at Dallas, Richardson, TX, USA","institution_ids":["https://openalex.org/I162577319"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.3302,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.81587695,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"25","issue":"9","first_page":"1290","last_page":"1294"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/apollo","display_name":"Apollo","score":0.790256679058075},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7586972713470459},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5729579925537109},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.5366677045822144},{"id":"https://openalex.org/keywords/distortion","display_name":"Distortion (music)","score":0.5006692409515381},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.4958074986934662},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48936909437179565},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.47477981448173523},{"id":"https://openalex.org/keywords/audio-signal","display_name":"Audio signal","score":0.4474864602088928},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.19732871651649475},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.10822978615760803}],"concepts":[{"id":"https://openalex.org/C2779821442","wikidata":"https://www.wikidata.org/wiki/Q41633","display_name":"Apollo","level":2,"score":0.790256679058075},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7586972713470459},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5729579925537109},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.5366677045822144},{"id":"https://openalex.org/C126780896","wikidata":"https://www.wikidata.org/wiki/Q899871","display_name":"Distortion (music)","level":4,"score":0.5006692409515381},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.4958074986934662},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48936909437179565},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.47477981448173523},{"id":"https://openalex.org/C64922751","wikidata":"https://www.wikidata.org/wiki/Q4650799","display_name":"Audio signal","level":3,"score":0.4474864602088928},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.19732871651649475},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.10822978615760803},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C194257627","wikidata":"https://www.wikidata.org/wiki/Q211554","display_name":"Amplifier","level":3,"score":0.0},{"id":"https://openalex.org/C90856448","wikidata":"https://www.wikidata.org/wiki/Q431","display_name":"Zoology","level":1,"score":0.0},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lsp.2018.2841653","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2018.2841653","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.7900000214576721}],"awards":[{"id":"https://openalex.org/G8003058278","display_name":null,"funder_award_id":"1219130","funder_id":"https://openalex.org/F4320335353","funder_display_name":"National Science Foundation of Sri Lanka"},{"id":"https://openalex.org/G8087301405","display_name":null,"funder_award_id":"FA8750-15-1-0205","funder_id":"https://openalex.org/F4320338294","funder_display_name":"Air Force Research Laboratory"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320306101","display_name":"National Aeronautics and Space Administration","ror":"https://ror.org/027ka1x80"},{"id":"https://openalex.org/F4320327708","display_name":"University of Texas at Dallas","ror":"https://ror.org/049emcs32"},{"id":"https://openalex.org/F4320335353","display_name":"National Science Foundation of Sri Lanka","ror":"https://ror.org/010xaa060"},{"id":"https://openalex.org/F4320338294","display_name":"Air Force Research Laboratory","ror":"https://ror.org/02e2egq70"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W1505351954","https://openalex.org/W1524333225","https://openalex.org/W1536583098","https://openalex.org/W1661756259","https://openalex.org/W1985242443","https://openalex.org/W1999454387","https://openalex.org/W2020954030","https://openalex.org/W2023098317","https://openalex.org/W2023582935","https://openalex.org/W2036478976","https://openalex.org/W2048497537","https://openalex.org/W2059203007","https://openalex.org/W2085628288","https://openalex.org/W2115717467","https://openalex.org/W2129120544","https://openalex.org/W2152395643","https://openalex.org/W2152710595","https://openalex.org/W2170389424","https://openalex.org/W2182389375","https://openalex.org/W2296073425","https://openalex.org/W2395659169","https://openalex.org/W2398020690","https://openalex.org/W2406226915","https://openalex.org/W2406262283","https://openalex.org/W2747471357","https://openalex.org/W2748128064","https://openalex.org/W2753897885","https://openalex.org/W2766245080","https://openalex.org/W2963390466","https://openalex.org/W2963947576","https://openalex.org/W6631362777","https://openalex.org/W6636969168","https://openalex.org/W6685951362","https://openalex.org/W6713752823","https://openalex.org/W6713986472"],"related_works":["https://openalex.org/W2054178368","https://openalex.org/W2001637022","https://openalex.org/W2921637885","https://openalex.org/W4226493464","https://openalex.org/W4312417841","https://openalex.org/W3193565141","https://openalex.org/W3133861977","https://openalex.org/W3167935049","https://openalex.org/W3029198973","https://openalex.org/W1571003708"],"abstract_inverted_index":{"Speech":[0],"activity":[1],"detection":[2],"(SAD)":[3],"is":[4,24,148],"a":[5,25,137,168,201],"fundamental":[6],"building":[7],"block":[8],"for":[9],"most":[10],"spoken":[11],"language":[12],"technology":[13],"systems.":[14],"Developing":[15],"efficient":[16,151],"SAD":[17,33,205],"systems":[18],"in":[19,112,167,204],"highly":[20,134],"naturalist":[21],"data":[22,40,43,56,164,194],"scenarios":[23,125],"challenge.":[26],"In":[27,88],"this":[28,89],"study,":[29,90],"we":[30,91,179],"investigate":[31],"the":[32,154,172,175,184,197],"problem":[34],"on":[35,189],"NASAs":[36],"Apollo":[37,42,55,121,192],"space":[38],"mission":[39],"[1].":[41],"consists":[44],"of":[45,74,85,158],"long-term":[46,113,155],"naturalistic":[47],"audio":[48,114,122,193],"recordings":[49],"(i.e.,":[50],"6-12":[51],"day":[52],"missions).":[53],"The":[54],"poses":[57],"various":[58],"challenges":[59],"like:":[60],"1)":[61],"noise":[62],"distortion":[63],"with":[64],"variable":[65,108],"SNR,":[66],"2)":[67],"channel":[68],"distortion,":[69],"3)":[70],"very":[71],"high":[72],"density":[73,110],"speech,":[75,80],"4)":[76],"foreground":[77,127],"versus":[78,128],"background":[79,129],"and":[81,133,160],"5)":[82],"extended":[83],"periods":[84],"nonspeech":[86],"activity.":[87],"use":[92,180],"threshold":[93],"optimized":[94],"combo-SAD":[95],"[21]":[96],"as":[97,183],"our":[98],"baseline":[99],"unsupervised":[100],"system.":[101],"This":[102,150],"technique":[103],"was":[104],"developed":[105],"to":[106,120],"address":[107],"speech/nonspeech":[109],"issues":[111,118],"data.":[115],"To":[116],"mitigate":[117],"related":[119],"loops,":[123,132],"multispeaker":[124],"including":[126],"conversations":[130],"within":[131],"noisy":[135],"background,":[136],"new":[138],"curriculum":[139],"learning":[140,156,176,185],"(CL)":[141],"based":[142],"convolutional":[143],"neural":[144],"network":[145],"(CNN)":[146],"model":[147],"developed.":[149],"method":[152],"leverages":[153],"capability":[157],"CNN":[159],"CL":[161],"strategies":[162],"where":[163],"are":[165],"trained":[166],"manner":[169],"that":[170,196],"improves":[171],"efficiency":[173],"during":[174],"process.":[177],"Here,":[178],"signal-to-noise":[181],"ratio":[182],"parameter.":[186],"Our":[187],"experiments":[188],"free":[190],"flowing":[191],"show":[195],"proposed":[198],"approach":[199],"provides":[200],"significant":[202],"improvement":[203],"performance":[206],"(>":[207],"10%).":[208]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":6},{"year":2019,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
