{"id":"https://openalex.org/W2947862783","doi":"https://doi.org/10.1109/icdipc.2019.8723681","title":"SSQA: Speech Signal Quality Assessment Method using Spectrogram and 2-D Convolutional Neural Networks for Improving Efficiency of ASR Devices","display_name":"SSQA: Speech Signal Quality Assessment Method using Spectrogram and 2-D Convolutional Neural Networks for Improving Efficiency of ASR Devices","publication_year":2019,"publication_date":"2019-05-01","ids":{"openalex":"https://openalex.org/W2947862783","doi":"https://doi.org/10.1109/icdipc.2019.8723681","mag":"2947862783"},"language":"en","primary_location":{"id":"doi:10.1109/icdipc.2019.8723681","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icdipc.2019.8723681","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 Seventh International Conference on Digital Information Processing and Communications (ICDIPC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5061573210","display_name":"Pooja Kumawat","orcid":"https://orcid.org/0000-0002-4963-7366"},"institutions":[{"id":"https://openalex.org/I99729588","display_name":"Indian Institute of Technology Bhubaneswar","ror":"https://ror.org/04gx72j20","country_code":"IN","type":"education","lineage":["https://openalex.org/I99729588"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Pooja Kumawat","raw_affiliation_strings":["Real-time Embedded Signal Processing Lab, Indian Institute of Technology Bhubaneswar, Jatani, Odisha, INDIA","Real-time Embedded Signal Processing Lab, Indian Institute of Technology Bhubaneswar, Jatani, Odisha, 752050, INDIA"],"affiliations":[{"raw_affiliation_string":"Real-time Embedded Signal Processing Lab, Indian Institute of Technology Bhubaneswar, Jatani, Odisha, INDIA","institution_ids":["https://openalex.org/I99729588"]},{"raw_affiliation_string":"Real-time Embedded Signal Processing Lab, Indian Institute of Technology Bhubaneswar, Jatani, Odisha, 752050, INDIA","institution_ids":["https://openalex.org/I99729588"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5062664549","display_name":"M. Sabarimalai Manikandan","orcid":"https://orcid.org/0000-0001-6878-4911"},"institutions":[{"id":"https://openalex.org/I99729588","display_name":"Indian Institute of Technology Bhubaneswar","ror":"https://ror.org/04gx72j20","country_code":"IN","type":"education","lineage":["https://openalex.org/I99729588"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"M. Sabarimalai Manikandan","raw_affiliation_strings":["Real-time Embedded Signal Processing Lab, Indian Institute of Technology Bhubaneswar, Jatani, Odisha, INDIA","Real-time Embedded Signal Processing Lab, Indian Institute of Technology Bhubaneswar, Jatani, Odisha, 752050, INDIA"],"affiliations":[{"raw_affiliation_string":"Real-time Embedded Signal Processing Lab, Indian Institute of Technology Bhubaneswar, Jatani, Odisha, INDIA","institution_ids":["https://openalex.org/I99729588"]},{"raw_affiliation_string":"Real-time Embedded Signal Processing Lab, Indian Institute of Technology Bhubaneswar, Jatani, Odisha, 752050, INDIA","institution_ids":["https://openalex.org/I99729588"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5061573210"],"corresponding_institution_ids":["https://openalex.org/I99729588"],"apc_list":null,"apc_paid":null,"fwci":0.6634,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.67761139,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"29","last_page":"34"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9937000274658203,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.8248502612113953},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7976169586181641},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7963885068893433},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.6588472127914429},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.6416348218917847},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.5949822068214417},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.5307495594024658},{"id":"https://openalex.org/keywords/signal","display_name":"SIGNAL (programming language)","score":0.486270546913147},{"id":"https://openalex.org/keywords/noise-measurement","display_name":"Noise measurement","score":0.4776807427406311},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.4561980366706848},{"id":"https://openalex.org/keywords/background-noise","display_name":"Background noise","score":0.4515399932861328},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.42340704798698425},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.38041746616363525},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.24016982316970825},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.06400823593139648}],"concepts":[{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.8248502612113953},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7976169586181641},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7963885068893433},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.6588472127914429},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.6416348218917847},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.5949822068214417},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.5307495594024658},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.486270546913147},{"id":"https://openalex.org/C29265498","wikidata":"https://www.wikidata.org/wiki/Q7047719","display_name":"Noise measurement","level":3,"score":0.4776807427406311},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.4561980366706848},{"id":"https://openalex.org/C100675267","wikidata":"https://www.wikidata.org/wiki/Q1371624","display_name":"Background noise","level":2,"score":0.4515399932861328},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42340704798698425},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.38041746616363525},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.24016982316970825},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.06400823593139648},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icdipc.2019.8723681","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icdipc.2019.8723681","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 Seventh International Conference on Digital Information Processing and Communications (ICDIPC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5600000023841858,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W1980759728","https://openalex.org/W1995562189","https://openalex.org/W2009934439","https://openalex.org/W2015337779","https://openalex.org/W2035576074","https://openalex.org/W2062164080","https://openalex.org/W2296167893","https://openalex.org/W2326699523","https://openalex.org/W2491899193","https://openalex.org/W2583851971","https://openalex.org/W2760287881","https://openalex.org/W2797187289","https://openalex.org/W2901212597","https://openalex.org/W2964245029"],"related_works":["https://openalex.org/W4214896311","https://openalex.org/W1975881626","https://openalex.org/W4252208810","https://openalex.org/W2418631473","https://openalex.org/W2277237561","https://openalex.org/W2364132456","https://openalex.org/W2390182490","https://openalex.org/W1969920786","https://openalex.org/W2183143893","https://openalex.org/W2357471906"],"abstract_inverted_index":{"Most":[0],"on-device":[1,70],"and":[2,37,71,89,108,136,163,170,193,238,262,266],"cloud":[3,78,276],"processing":[4,69,277],"based":[5,84,129,278],"automatic":[6],"speech":[7,19,52,66,107,110,148,165,172,234,240,267],"recognition":[8,13,268],"(ASR)":[9],"systems":[10],"had":[11,131,141],"poor":[12],"performance":[14],"due":[15],"to":[16,76,176,182,201,212,230],"the":[17,61,73,77,86,127,146,158,168,177,184,188,203,208,220,228,232,252,255,258,264,273],"noisy":[18,109,147,164,171],"signals":[20,111,173],"corrupted":[21,114],"by":[22],"various":[23,116],"kinds":[24,117],"of":[25,63,105,118,161,205,227,254,257,275],"background":[26],"noises":[27,39,119,229],"such":[28],"as":[29],"vehicle,":[30],"train,":[31],"aircraft,":[32],"fan,":[33],"wind,":[34],"rain,":[35],"air-conditioner,":[36],"machinery":[38],"which":[40,112],"are":[41,113,174],"unavoidable":[42],"realistic":[43],"scenarios.":[44],"In":[45,217],"this":[46,218],"paper,":[47],"we":[48],"propose":[49],"a":[50,64,102],"novel":[51],"signal":[53,67,235],"quality":[54,62],"assessment":[55],"(SSQA)":[56],"method":[57,82,98,130,140],"for":[58,225],"automatically":[59],"assessing":[60],"recorded":[65,74,233],"before":[68],"sending":[72],"data":[75],"server.":[79],"The":[80,95,139,242],"proposed":[81,96,243],"is":[83,99,154,223],"on":[85],"spectrogram":[87],"feature":[88],"two-dimensional":[90],"convolutional":[91],"neural":[92],"networks":[93],"(2D-CNNs).":[94],"SSQA":[97],"evaluated":[100],"using":[101],"large":[103],"scale":[104],"noise-free":[106,162,169],"with":[115,120],"different":[121],"noise":[122,206,221],"levels.":[123],"Results":[124,150],"show":[125],"that":[126,152],"2D-CNN":[128],"an":[132],"average":[133],"Se=90.92%,":[134],"Sp=98.44%":[135],"OA":[137],"=96.44%.":[138],"better":[142],"results":[143],"in":[144,156,250,272],"detecting":[145],"segments.":[149,166,241],"showed":[151],"there":[153],"confusion":[155],"performing":[157],"manual":[159],"labelling":[160],"Therefore,":[167],"given":[175],"publicly":[178],"available":[179],"ASR":[180,209,245,260,279],"system":[181,210,246],"obtain":[183],"corresponding":[185],"text.":[186,216],"Then":[187],"word":[189],"error":[190,195],"rate":[191,196],"(WER)":[192],"character":[194],"(CER)":[197],"metrics":[198],"were":[199],"used":[200],"know":[202],"level":[204,222],"wherein":[207],"fails":[211],"correctly":[213],"recognize":[214],"its":[215],"way,":[219],"determined":[224],"each":[226],"label":[231],"into":[236],"acceptable":[237],"unacceptable":[239],"quality-aware":[244],"has":[247],"great":[248],"potential":[249],"improving":[251],"lifetime":[253],"battery":[256],"portable":[259],"devices":[261],"reducing":[263],"bandwidth":[265],"software":[269],"utilization":[270],"costs":[271],"case":[274],"system.":[280]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
