{"id":"https://openalex.org/W4404101961","doi":"https://doi.org/10.1109/jstsp.2024.3491576","title":"Perceptual Neural Audio Coding With Modified Discrete Cosine Transform","display_name":"Perceptual Neural Audio Coding With Modified Discrete Cosine Transform","publication_year":2024,"publication_date":"2024-11-06","ids":{"openalex":"https://openalex.org/W4404101961","doi":"https://doi.org/10.1109/jstsp.2024.3491576"},"language":"en","primary_location":{"id":"doi:10.1109/jstsp.2024.3491576","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jstsp.2024.3491576","pdf_url":null,"source":{"id":"https://openalex.org/S42167783","display_name":"IEEE Journal of Selected Topics in Signal Processing","issn_l":"1932-4553","issn":["1932-4553","1941-0484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal of Selected Topics in Signal Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5075112605","display_name":"Hyungseob Lim","orcid":null},"institutions":[{"id":"https://openalex.org/I193775966","display_name":"Yonsei University","ror":"https://ror.org/01wjejq96","country_code":"KR","type":"education","lineage":["https://openalex.org/I193775966"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Hyungseob Lim","raw_affiliation_strings":["Department of Electrical and Electronic Engineering, Yonsei University, Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Electronic Engineering, Yonsei University, Seoul, South Korea","institution_ids":["https://openalex.org/I193775966"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100413513","display_name":"Ji\u2010Hyun Lee","orcid":"https://orcid.org/0000-0002-9864-5485"},"institutions":[{"id":"https://openalex.org/I193775966","display_name":"Yonsei University","ror":"https://ror.org/01wjejq96","country_code":"KR","type":"education","lineage":["https://openalex.org/I193775966"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jihyun Lee","raw_affiliation_strings":["Department of Electrical and Electronic Engineering, Yonsei University, Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Electronic Engineering, Yonsei University, Seoul, South Korea","institution_ids":["https://openalex.org/I193775966"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034035652","display_name":"Byeong Hyeon Kim","orcid":null},"institutions":[{"id":"https://openalex.org/I193775966","display_name":"Yonsei University","ror":"https://ror.org/01wjejq96","country_code":"KR","type":"education","lineage":["https://openalex.org/I193775966"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Byeong Hyeon Kim","raw_affiliation_strings":["Department of Electrical and Electronic Engineering, Yonsei University, Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Electronic Engineering, Yonsei University, Seoul, South Korea","institution_ids":["https://openalex.org/I193775966"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059001612","display_name":"Inseon Jang","orcid":"https://orcid.org/0000-0003-2237-2668"},"institutions":[{"id":"https://openalex.org/I142401562","display_name":"Electronics and Telecommunications Research Institute","ror":"https://ror.org/03ysstz10","country_code":"KR","type":"facility","lineage":["https://openalex.org/I142401562","https://openalex.org/I2801339556","https://openalex.org/I4210144908","https://openalex.org/I4387152098"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Inseon Jang","raw_affiliation_strings":["Electronics and Telecommunications Research Institute, Daejeon, South Korea"],"affiliations":[{"raw_affiliation_string":"Electronics and Telecommunications Research Institute, Daejeon, South Korea","institution_ids":["https://openalex.org/I142401562"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056128107","display_name":"Hong-Goo Kang","orcid":"https://orcid.org/0000-0002-6554-0783"},"institutions":[{"id":"https://openalex.org/I193775966","display_name":"Yonsei University","ror":"https://ror.org/01wjejq96","country_code":"KR","type":"education","lineage":["https://openalex.org/I193775966"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Hong-Goo Kang","raw_affiliation_strings":["Department of Electrical and Electronic Engineering, Yonsei University, Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Electronic Engineering, Yonsei University, Seoul, South Korea","institution_ids":["https://openalex.org/I193775966"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5075112605"],"corresponding_institution_ids":["https://openalex.org/I193775966"],"apc_list":null,"apc_paid":null,"fwci":0.2632,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.56021275,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"18","issue":"8","first_page":"1490","last_page":"1505"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10688","display_name":"Image and Signal Denoising Methods","score":0.9812999963760376,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10688","display_name":"Image and Signal Denoising Methods","score":0.9812999963760376,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9765999913215637,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9695000052452087,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discrete-cosine-transform","display_name":"Discrete cosine transform","score":0.7926561832427979},{"id":"https://openalex.org/keywords/lapped-transform","display_name":"Lapped transform","score":0.7411282062530518},{"id":"https://openalex.org/keywords/modified-discrete-cosine-transform","display_name":"Modified discrete cosine transform","score":0.7208526730537415},{"id":"https://openalex.org/keywords/transform-coding","display_name":"Transform coding","score":0.7116196155548096},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6757752299308777},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.5728235840797424},{"id":"https://openalex.org/keywords/sub-band-coding","display_name":"Sub-band coding","score":0.5457042455673218},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5324141979217529},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5054192543029785},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.4820433557033539},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.44281768798828125},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.43484047055244446},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3762989938259125},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3722578287124634},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.25193947553634644},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.10889938473701477},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.08636173605918884}],"concepts":[{"id":"https://openalex.org/C2221639","wikidata":"https://www.wikidata.org/wiki/Q2877","display_name":"Discrete cosine transform","level":3,"score":0.7926561832427979},{"id":"https://openalex.org/C91458471","wikidata":"https://www.wikidata.org/wiki/Q17096468","display_name":"Lapped transform","level":5,"score":0.7411282062530518},{"id":"https://openalex.org/C28726691","wikidata":"https://www.wikidata.org/wiki/Q1268231","display_name":"Modified discrete cosine transform","level":5,"score":0.7208526730537415},{"id":"https://openalex.org/C169805256","wikidata":"https://www.wikidata.org/wiki/Q1361381","display_name":"Transform coding","level":4,"score":0.7116196155548096},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6757752299308777},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.5728235840797424},{"id":"https://openalex.org/C98526533","wikidata":"https://www.wikidata.org/wiki/Q1691938","display_name":"Sub-band coding","level":3,"score":0.5457042455673218},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5324141979217529},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5054192543029785},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.4820433557033539},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.44281768798828125},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.43484047055244446},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3762989938259125},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3722578287124634},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.25193947553634644},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.10889938473701477},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.08636173605918884},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/jstsp.2024.3491576","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jstsp.2024.3491576","pdf_url":null,"source":{"id":"https://openalex.org/S42167783","display_name":"IEEE Journal of Selected Topics in Signal Processing","issn_l":"1932-4553","issn":["1932-4553","1941-0484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal of Selected Topics in Signal Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":58,"referenced_works":["https://openalex.org/W1498436455","https://openalex.org/W1552314771","https://openalex.org/W1580389772","https://openalex.org/W1670669358","https://openalex.org/W1973207880","https://openalex.org/W1995875735","https://openalex.org/W2033750332","https://openalex.org/W2060108852","https://openalex.org/W2105788792","https://openalex.org/W2112796928","https://openalex.org/W2120376959","https://openalex.org/W2129652681","https://openalex.org/W2165291881","https://openalex.org/W2552465432","https://openalex.org/W2752796333","https://openalex.org/W2768814045","https://openalex.org/W2785562966","https://openalex.org/W2963182577","https://openalex.org/W2981613960","https://openalex.org/W2982853315","https://openalex.org/W2998733532","https://openalex.org/W3037038648","https://openalex.org/W3095497211","https://openalex.org/W3097934054","https://openalex.org/W3110277971","https://openalex.org/W3145029257","https://openalex.org/W3162366763","https://openalex.org/W3179468628","https://openalex.org/W3214758449","https://openalex.org/W3215615641","https://openalex.org/W4224916783","https://openalex.org/W4242218465","https://openalex.org/W4254592580","https://openalex.org/W4301513688","https://openalex.org/W4307323391","https://openalex.org/W4312806968","https://openalex.org/W4372267315","https://openalex.org/W4375869436","https://openalex.org/W4381786045","https://openalex.org/W4385245566","https://openalex.org/W4385822272","https://openalex.org/W4392904681","https://openalex.org/W6629271464","https://openalex.org/W6630442970","https://openalex.org/W6631090947","https://openalex.org/W6631257896","https://openalex.org/W6635534829","https://openalex.org/W6695676441","https://openalex.org/W6741057705","https://openalex.org/W6755977528","https://openalex.org/W6757817989","https://openalex.org/W6758867260","https://openalex.org/W6762114000","https://openalex.org/W6773772901","https://openalex.org/W6779823529","https://openalex.org/W6780226713","https://openalex.org/W6780365925","https://openalex.org/W6853515095"],"related_works":["https://openalex.org/W2978563117","https://openalex.org/W2046080157","https://openalex.org/W2382580345","https://openalex.org/W2173905799","https://openalex.org/W3152310464","https://openalex.org/W2120611566","https://openalex.org/W1594300462","https://openalex.org/W1967066785","https://openalex.org/W2156477611","https://openalex.org/W1915466985"],"abstract_inverted_index":{"Despite":[0],"efforts":[1],"to":[2,25,78,220],"leverage":[3],"the":[4,31,59,62,74,82,86,94,110,133,148,166,178,207,217],"modeling":[5],"power":[6],"of":[7,34,61,73,114],"deep":[8],"neural":[9,192,222],"networks":[10],"(DNNs)":[11],"in":[12,18,65,85],"audio":[13,54,168,185,193,223],"coding,":[14],"effectively":[15,171],"deploying":[16],"them":[17],"real-world":[19],"applications":[20],"is":[21,77,136,151],"still":[22],"problematic":[23],"due":[24],"their":[26,175],"high":[27],"computational":[28,214],"cost":[29],"and":[30,139,188,190],"restricted":[32],"range":[33],"target":[35],"signals":[36],"or":[37],"achievable":[38],"bit-rates.":[39],"In":[40],"this":[41],"paper,":[42],"we":[43,196],"propose":[44],"an":[45,118],"alternative":[46],"approach":[47],"for":[48,58,107],"integrating":[49],"DNNs":[50,80,142],"into":[51],"a":[52,66,103,124,154,191,211],"perceptual":[53,149,155,179],"coder":[55],"that":[56,165,198],"allows":[57],"optimization":[60],"whole":[63],"system":[64],"data-driven,":[67],"end-to-end":[68],"manner.":[69],"The":[70,100,129],"key":[71],"idea":[72],"proposed":[75,167],"method":[76,200],"make":[79],"control":[81],"quantization":[83,111,135],"noise":[84],"classic":[87],"transform":[88,98],"coding":[89,204],"framework,":[90],"specifically":[91],"based":[92],"on":[93,160,177,216],"modified":[95],"discrete":[96],"cosine":[97],"(MDCT).":[99],"proposal":[101],"includes":[102],"new":[104],"DNN-based":[105],"mechanism":[106],"adaptively":[108],"adjusting":[109],"step":[112],"sizes":[113],"frequency":[115],"bands":[116],"targeting":[117],"arbitrary":[119],"bit-rate,":[120],"eventually":[121],"acting":[122],"as":[123],"data-driven":[125],"differentiable":[126],"psychoacoustic":[127],"model.":[128],"side":[130],"information":[131],"regarding":[132],"adaptive":[134],"also":[137],"encoded":[138],"decoded":[140],"by":[141,153],"via":[143],"learned":[144],"representation.":[145],"During":[146],"training,":[147],"distortion":[150],"evaluated":[152],"quality":[156],"estimation":[157],"model":[158],"trained":[159],"actual":[161],"human":[162],"ratings":[163],"so":[164],"codec":[169,194],"can":[170,201],"allocate":[172],"bits":[173],"considering":[174],"effect":[176],"quality.":[180],"Through":[181],"comparisons":[182],"with":[183,210],"legacy":[184,208],"codecs":[186,209],"(MP3":[187],"AAC)":[189],"(EnCodec),":[195],"show":[197],"our":[199],"achieve":[202],"further":[203],"gains":[205],"over":[206],"substantially":[212],"lower":[213],"load":[215],"decoder":[218],"compared":[219],"other":[221],"codecs.":[224]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
