{"id":"https://openalex.org/W4392904370","doi":"https://doi.org/10.1109/icassp48485.2024.10447577","title":"Perceptually-Motivated Spatial Audio Codec for Higher-Order Ambisonics Compression","display_name":"Perceptually-Motivated Spatial Audio Codec for Higher-Order Ambisonics Compression","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392904370","doi":"https://doi.org/10.1109/icassp48485.2024.10447577"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10447577","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447577","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://research.aalto.fi/en/publications/4973f573-6612-44e4-9c59-f84366cf198c","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5061205893","display_name":"Christoph Hold","orcid":"https://orcid.org/0000-0001-6579-265X"},"institutions":[{"id":"https://openalex.org/I9927081","display_name":"Aalto University","ror":"https://ror.org/020hwjq30","country_code":"FI","type":"education","lineage":["https://openalex.org/I9927081"]}],"countries":["FI"],"is_corresponding":true,"raw_author_name":"Christoph Hold","raw_affiliation_strings":["Aalto University,Acoustics Lab,Dept. Information and Communications Eng.,Espoo,Finland","Dept. Information and Communications Eng., Acoustics Lab, Aalto University, Espoo, Finland"],"affiliations":[{"raw_affiliation_string":"Aalto University,Acoustics Lab,Dept. Information and Communications Eng.,Espoo,Finland","institution_ids":["https://openalex.org/I9927081"]},{"raw_affiliation_string":"Dept. Information and Communications Eng., Acoustics Lab, Aalto University, Espoo, Finland","institution_ids":["https://openalex.org/I9927081"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033768731","display_name":"Leo McCormack","orcid":"https://orcid.org/0000-0003-2448-4444"},"institutions":[{"id":"https://openalex.org/I9927081","display_name":"Aalto University","ror":"https://ror.org/020hwjq30","country_code":"FI","type":"education","lineage":["https://openalex.org/I9927081"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Leo McCormack","raw_affiliation_strings":["Aalto University,Acoustics Lab,Dept. Information and Communications Eng.,Espoo,Finland","Dept. Information and Communications Eng., Acoustics Lab, Aalto University, Espoo, Finland"],"affiliations":[{"raw_affiliation_string":"Aalto University,Acoustics Lab,Dept. Information and Communications Eng.,Espoo,Finland","institution_ids":["https://openalex.org/I9927081"]},{"raw_affiliation_string":"Dept. Information and Communications Eng., Acoustics Lab, Aalto University, Espoo, Finland","institution_ids":["https://openalex.org/I9927081"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010846139","display_name":"Archontis Politis","orcid":"https://orcid.org/0000-0002-0595-2356"},"institutions":[{"id":"https://openalex.org/I166825849","display_name":"Tampere University","ror":"https://ror.org/033003e23","country_code":"FI","type":"education","lineage":["https://openalex.org/I166825849"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Archontis Politis","raw_affiliation_strings":["Tampere University,Faculty of Information Technology and Communication Sciences,Finland","Faculty of Information Technology and Communication Sciences, Tampere University, Finland"],"affiliations":[{"raw_affiliation_string":"Tampere University,Faculty of Information Technology and Communication Sciences,Finland","institution_ids":["https://openalex.org/I166825849"]},{"raw_affiliation_string":"Faculty of Information Technology and Communication Sciences, Tampere University, Finland","institution_ids":["https://openalex.org/I166825849"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033040947","display_name":"Ville Pulkki","orcid":"https://orcid.org/0000-0003-3460-9677"},"institutions":[{"id":"https://openalex.org/I9927081","display_name":"Aalto University","ror":"https://ror.org/020hwjq30","country_code":"FI","type":"education","lineage":["https://openalex.org/I9927081"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Ville Pulkki","raw_affiliation_strings":["Aalto University,Acoustics Lab,Dept. Information and Communications Eng.,Espoo,Finland","Dept. Information and Communications Eng., Acoustics Lab, Aalto University, Espoo, Finland"],"affiliations":[{"raw_affiliation_string":"Aalto University,Acoustics Lab,Dept. Information and Communications Eng.,Espoo,Finland","institution_ids":["https://openalex.org/I9927081"]},{"raw_affiliation_string":"Dept. Information and Communications Eng., Acoustics Lab, Aalto University, Espoo, Finland","institution_ids":["https://openalex.org/I9927081"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5061205893"],"corresponding_institution_ids":["https://openalex.org/I9927081"],"apc_list":null,"apc_paid":null,"fwci":1.053,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.73387439,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1121","last_page":"1125"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11233","display_name":"Advanced Adaptive Filtering Techniques","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/ambisonics","display_name":"Ambisonics","score":0.9837909936904907},{"id":"https://openalex.org/keywords/codec","display_name":"Codec","score":0.835076093673706},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7597431540489197},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5582794547080994},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5213314890861511},{"id":"https://openalex.org/keywords/dynamic-range-compression","display_name":"Dynamic range compression","score":0.5031799674034119},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.49334731698036194},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.4461610019207001},{"id":"https://openalex.org/keywords/parametric-statistics","display_name":"Parametric statistics","score":0.4399990141391754},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.4270172715187073},{"id":"https://openalex.org/keywords/wideband-audio","display_name":"Wideband audio","score":0.42079320549964905},{"id":"https://openalex.org/keywords/digital-audio","display_name":"Digital audio","score":0.376669704914093},{"id":"https://openalex.org/keywords/loudspeaker","display_name":"Loudspeaker","score":0.30873504281044006},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.24717608094215393},{"id":"https://openalex.org/keywords/audio-signal","display_name":"Audio signal","score":0.22207477688789368},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.15235239267349243},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.1375756561756134},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08695083856582642}],"concepts":[{"id":"https://openalex.org/C47726159","wikidata":"https://www.wikidata.org/wiki/Q457547","display_name":"Ambisonics","level":3,"score":0.9837909936904907},{"id":"https://openalex.org/C161765866","wikidata":"https://www.wikidata.org/wiki/Q184748","display_name":"Codec","level":2,"score":0.835076093673706},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7597431540489197},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5582794547080994},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5213314890861511},{"id":"https://openalex.org/C150178126","wikidata":"https://www.wikidata.org/wiki/Q18433212","display_name":"Dynamic range compression","level":2,"score":0.5031799674034119},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.49334731698036194},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.4461610019207001},{"id":"https://openalex.org/C117251300","wikidata":"https://www.wikidata.org/wiki/Q1849855","display_name":"Parametric statistics","level":2,"score":0.4399990141391754},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.4270172715187073},{"id":"https://openalex.org/C173391809","wikidata":"https://www.wikidata.org/wiki/Q2740189","display_name":"Wideband audio","level":5,"score":0.42079320549964905},{"id":"https://openalex.org/C87687168","wikidata":"https://www.wikidata.org/wiki/Q173114","display_name":"Digital audio","level":4,"score":0.376669704914093},{"id":"https://openalex.org/C157138929","wikidata":"https://www.wikidata.org/wiki/Q570","display_name":"Loudspeaker","level":2,"score":0.30873504281044006},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.24717608094215393},{"id":"https://openalex.org/C64922751","wikidata":"https://www.wikidata.org/wiki/Q4650799","display_name":"Audio signal","level":3,"score":0.22207477688789368},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.15235239267349243},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.1375756561756134},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08695083856582642},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icassp48485.2024.10447577","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447577","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:aaltodoc.aalto.fi:123456789/131128","is_oa":true,"landing_page_url":"https://research.aalto.fi/en/publications/4973f573-6612-44e4-9c59-f84366cf198c","pdf_url":null,"source":{"id":"https://openalex.org/S4306401662","display_name":"Aaltodoc (Aalto University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I9927081","host_organization_name":"Aalto University","host_organization_lineage":["https://openalex.org/I9927081"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"acceptedVersion"}],"best_oa_location":{"id":"pmh:oai:aaltodoc.aalto.fi:123456789/131128","is_oa":true,"landing_page_url":"https://research.aalto.fi/en/publications/4973f573-6612-44e4-9c59-f84366cf198c","pdf_url":null,"source":{"id":"https://openalex.org/S4306401662","display_name":"Aaltodoc (Aalto University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I9927081","host_organization_name":"Aalto University","host_organization_lineage":["https://openalex.org/I9927081"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"acceptedVersion"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W1511723925","https://openalex.org/W1588397041","https://openalex.org/W1991170630","https://openalex.org/W2028075876","https://openalex.org/W2037814094","https://openalex.org/W2059450962","https://openalex.org/W2060108852","https://openalex.org/W2075546475","https://openalex.org/W2161488606","https://openalex.org/W2328926509","https://openalex.org/W2494085323","https://openalex.org/W2761804494","https://openalex.org/W2943499369","https://openalex.org/W2953950894","https://openalex.org/W2962751722","https://openalex.org/W3180801944","https://openalex.org/W3212516101","https://openalex.org/W3215930811","https://openalex.org/W3217252117","https://openalex.org/W4205788663","https://openalex.org/W4236344233","https://openalex.org/W4288854623","https://openalex.org/W4366338339","https://openalex.org/W4386764374","https://openalex.org/W4388755861","https://openalex.org/W6630773358","https://openalex.org/W6639363673","https://openalex.org/W6659849957","https://openalex.org/W6683504902","https://openalex.org/W6763156250","https://openalex.org/W6798587278"],"related_works":["https://openalex.org/W2108459488","https://openalex.org/W1534361175","https://openalex.org/W1578591928","https://openalex.org/W1988975528","https://openalex.org/W2785871231","https://openalex.org/W2111280847","https://openalex.org/W1970362090","https://openalex.org/W1857623347","https://openalex.org/W4236707033","https://openalex.org/W2166066403"],"abstract_inverted_index":{"Scene-based":[0],"spatial":[1,37,142],"audio":[2,19,75,91,97,143,150],"formats,":[3],"such":[4],"as":[5],"Ambisonics,":[6],"are":[7,83,102,117,125],"playback":[8],"system":[9],"agnostic":[10],"and":[11,108],"may":[12],"therefore":[13],"be":[14,43],"favoured":[15],"for":[16,45],"delivering":[17],"immersive":[18],"experiences":[20],"to":[21,34,128,155,160],"a":[22,52,77,95,134],"wide":[23],"range":[24],"of":[25,31,80,133],"(potentially":[26],"unknown)":[27],"devices.":[28],"The":[29,67,89,131],"number":[30,79],"channels":[32],"required":[33],"deliver":[35],"high":[36],"resolution":[38],"Ambisonic":[39,72,115],"audio,":[40],"however,":[41],"can":[42,145],"prohibitive":[44],"low-bandwidth":[46],"applications.":[47],"Therefore,":[48],"this":[49],"paper":[50],"proposes":[51],"compression":[53],"codec,":[54],"which":[55,82],"is":[56,92],"based":[57],"upon":[58],"the":[59,70,100,111,129,139,147],"parametric":[60,141],"higher-order":[61,71],"Directional":[62],"Audio":[63],"Coding":[64],"(HO-DirAC)":[65],"model.":[66],"encoder":[68],"downmixes":[69],"(HOA)":[73],"input":[74],"into":[76,104],"reduced":[78],"signals,":[81],"accompanied":[84],"by":[85],"perceptually-motivated":[86],"scene":[87],"parameters.":[88,130],"downmixed":[90],"coded":[93],"using":[94],"perceptual":[96,105,149],"coder,":[98,151],"whereas":[99],"parameters":[101],"grouped":[103],"bands,":[106],"quantized,":[107],"downsampled.":[109],"On":[110],"decoder":[112],"side,":[113],"low":[114,154],"orders":[116],"fully":[118,121],"recovered.":[119],"Not":[120],"recoverable":[122],"HOA":[123,162],"components":[124],"synthesized":[126],"according":[127],"results":[132],"listening":[135],"test":[136],"indicate":[137],"that":[138],"proposed":[140],"codec":[144],"improve":[146],"adopted":[148],"especially":[152],"at":[153],"medium-high":[156],"bitrates,":[157],"when":[158],"applied":[159],"fifth-order":[161],"signals.":[163]},"counts_by_year":[{"year":2025,"cited_by_count":3}],"updated_date":"2026-03-28T08:17:26.163206","created_date":"2025-10-10T00:00:00"}
