{"id":"https://openalex.org/W3174660791","doi":"https://doi.org/10.1109/qomex51781.2021.9465476","title":"Towards Blind Audio Quality Assessment using a Convolutional-Recurrent Neural Network","display_name":"Towards Blind Audio Quality Assessment using a Convolutional-Recurrent Neural Network","publication_year":2021,"publication_date":"2021-06-14","ids":{"openalex":"https://openalex.org/W3174660791","doi":"https://doi.org/10.1109/qomex51781.2021.9465476","mag":"3174660791"},"language":"en","primary_location":{"id":"doi:10.1109/qomex51781.2021.9465476","is_oa":false,"landing_page_url":"https://doi.org/10.1109/qomex51781.2021.9465476","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 13th International Conference on Quality of Multimedia Experience (QoMEX)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034844592","display_name":"Xiguang Zheng","orcid":"https://orcid.org/0000-0003-3103-9090"},"institutions":[{"id":"https://openalex.org/I4401726859","display_name":"Kuaishou (China)","ror":"https://ror.org/0258as409","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726859"]},{"id":"https://openalex.org/I4210155967","display_name":"OriginWater (China)","ror":"https://ror.org/04h7gmn81","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210155967"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xiguang Zheng","raw_affiliation_strings":["Kuaishou Technology, Co,Beijing,China","Kuaishou Technology, Co, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Kuaishou Technology, Co,Beijing,China","institution_ids":["https://openalex.org/I4401726859"]},{"raw_affiliation_string":"Kuaishou Technology, Co, Beijing, China","institution_ids":["https://openalex.org/I4210155967","https://openalex.org/I4401726859"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100374052","display_name":"Chen Zhang","orcid":"https://orcid.org/0000-0001-5773-9090"},"institutions":[{"id":"https://openalex.org/I4210155967","display_name":"OriginWater (China)","ror":"https://ror.org/04h7gmn81","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210155967"]},{"id":"https://openalex.org/I4401726859","display_name":"Kuaishou (China)","ror":"https://ror.org/0258as409","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726859"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chen Zhang","raw_affiliation_strings":["Kuaishou Technology, Co,Beijing,China","Kuaishou Technology, Co, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Kuaishou Technology, Co,Beijing,China","institution_ids":["https://openalex.org/I4401726859"]},{"raw_affiliation_string":"Kuaishou Technology, Co, Beijing, China","institution_ids":["https://openalex.org/I4210155967","https://openalex.org/I4401726859"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5034844592"],"corresponding_institution_ids":["https://openalex.org/I4210155967","https://openalex.org/I4401726859"],"apc_list":null,"apc_paid":null,"fwci":0.7618,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.70398918,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"45","issue":null,"first_page":"91","last_page":"96"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12357","display_name":"Digital Media Forensic Detection","score":0.9898999929428101,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8350949287414551},{"id":"https://openalex.org/keywords/codec","display_name":"Codec","score":0.7730233669281006},{"id":"https://openalex.org/keywords/sound-quality","display_name":"Sound quality","score":0.6614422798156738},{"id":"https://openalex.org/keywords/audio-signal","display_name":"Audio signal","score":0.5997688174247742},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5984253883361816},{"id":"https://openalex.org/keywords/variable-bitrate","display_name":"Variable bitrate","score":0.516218900680542},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.5112783908843994},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.473846971988678},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.46944189071655273},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.33681148290634155},{"id":"https://openalex.org/keywords/bit-rate","display_name":"Bit rate","score":0.1703900396823883},{"id":"https://openalex.org/keywords/real-time-computing","display_name":"Real-time computing","score":0.08697867393493652}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8350949287414551},{"id":"https://openalex.org/C161765866","wikidata":"https://www.wikidata.org/wiki/Q184748","display_name":"Codec","level":2,"score":0.7730233669281006},{"id":"https://openalex.org/C167310288","wikidata":"https://www.wikidata.org/wiki/Q7564808","display_name":"Sound quality","level":2,"score":0.6614422798156738},{"id":"https://openalex.org/C64922751","wikidata":"https://www.wikidata.org/wiki/Q4650799","display_name":"Audio signal","level":3,"score":0.5997688174247742},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5984253883361816},{"id":"https://openalex.org/C57667952","wikidata":"https://www.wikidata.org/wiki/Q1154443","display_name":"Variable bitrate","level":3,"score":0.516218900680542},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.5112783908843994},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.473846971988678},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46944189071655273},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.33681148290634155},{"id":"https://openalex.org/C2987812609","wikidata":"https://www.wikidata.org/wiki/Q194158","display_name":"Bit rate","level":2,"score":0.1703900396823883},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.08697867393493652},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/qomex51781.2021.9465476","is_oa":false,"landing_page_url":"https://doi.org/10.1109/qomex51781.2021.9465476","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 13th International Conference on Quality of Multimedia Experience (QoMEX)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W1496883935","https://openalex.org/W1523644733","https://openalex.org/W1970308149","https://openalex.org/W2004478479","https://openalex.org/W2040151471","https://openalex.org/W2077084699","https://openalex.org/W2133824856","https://openalex.org/W2194775991","https://openalex.org/W2408292770","https://openalex.org/W2621651613","https://openalex.org/W2658430471","https://openalex.org/W2809971434","https://openalex.org/W2920683851","https://openalex.org/W2939156090","https://openalex.org/W2963403924","https://openalex.org/W3015594652","https://openalex.org/W3015735241","https://openalex.org/W4301513688","https://openalex.org/W6631257896","https://openalex.org/W6738953990"],"related_works":["https://openalex.org/W2374470469","https://openalex.org/W2352448339","https://openalex.org/W2787189739","https://openalex.org/W2110375858","https://openalex.org/W1745769461","https://openalex.org/W2899318368","https://openalex.org/W1968563888","https://openalex.org/W2159899134","https://openalex.org/W2363660481","https://openalex.org/W2089575384"],"abstract_inverted_index":{"Blind":[0],"estimation":[1],"of":[2,23,119,171],"audio":[3,14,25,53,62,139,190,215],"quality":[4,21,140,191],"is":[5,16,88,131],"desired":[6],"for":[7,169,206],"practical":[8],"applications":[9],"since":[10],"the":[11,24,31,39,52,56,60,68,78,98,101,106,112,120,149,153,158,162,172,182,186,194,207],"original":[12],"reference":[13],"signal":[15],"sometimes":[17],"unavailable.":[18],"The":[19,128],"subjective":[20],"degradation":[22],"signals":[26,54],"can":[27,104,165,197],"be":[28],"caused":[29],"by":[30,55],"low":[32],"bitrate":[33,108,155],"compression":[34,37],"and":[35,42,73,124],"multiple":[36],"during":[38,77],"content":[40],"submission":[41],"distribution":[43],"stages.":[44],"Existing":[45],"methods":[46],"have":[47],"been":[48,145],"proposed":[49,89,102,129,163,195],"to":[50,90,97,134,181,201],"classify":[51,202],"encoding":[57,79,121],"bitrates":[58,94,173],"with":[59],"informed":[61],"codec":[63],"name":[64],"as":[65,67],"well":[66],"estimated":[69],"MDCT":[70,113],"framing":[71,122,177],"grid":[72,123,178],"window":[74,125],"type":[75,126],"sequences":[76],"stage.":[80],"In":[81],"this":[82],"work,":[83],"a":[84],"convolutional-recurrent":[85],"neural":[86],"network":[87],"perform":[91,105,135],"blind":[92],"AAC":[93,107,154],"classification.":[95],"Compared":[96],"existing":[99,150,183],"methods,":[100],"method":[103,130,164,196],"classification":[109,156,192],"directly":[110],"from":[111],"coefficients":[114],"without":[115,174],"any":[116,176],"prior":[117],"knowledge":[118],"sequences.":[127],"further":[132],"extended":[133],"multi-codec":[136,187],"bitrate-related":[137,188],"perceptual":[138,189,204],"classification,":[141],"which":[142],"has":[143],"not":[144],"extensively":[146],"studied":[147],"in":[148],"literature.":[151],"For":[152,185],"task,":[157,193],"evaluation":[159],"results":[160],"show":[161],"achieve":[166,198],"similar":[167],"accuracy":[168,200],"most":[170],"using":[175,212],"information":[179],"compared":[180],"methods.":[184],"95%":[199],"three":[203,213],"classes":[205],"double":[208],"compressed":[209],"unseen":[210],"data":[211],"common":[214],"codecs.":[216]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
