{"id":"https://openalex.org/W4416242918","doi":"https://doi.org/10.48550/arxiv.2508.07829","title":"Auditory Intelligence: Understanding the World Through Sound","display_name":"Auditory Intelligence: Understanding the World Through Sound","publication_year":2025,"publication_date":"2025-08-11","ids":{"openalex":"https://openalex.org/W4416242918","doi":"https://doi.org/10.48550/arxiv.2508.07829"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2508.07829","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2508.07829","pdf_url":"https://arxiv.org/pdf/2508.07829","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2508.07829","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068022654","display_name":"Hyeonuk Nam","orcid":"https://orcid.org/0000-0002-1169-5640"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Nam, Hyeonuk","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5068022654"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.39149999618530273,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.39149999618530273,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10788","display_name":"Neuroscience and Music Perception","score":0.12890000641345978,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.037700001150369644,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/situated","display_name":"Situated","score":0.5845999717712402},{"id":"https://openalex.org/keywords/cognitive-reframing","display_name":"Cognitive reframing","score":0.5813999772071838},{"id":"https://openalex.org/keywords/selective-auditory-attention","display_name":"Selective auditory attention","score":0.5756000280380249},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.513700008392334},{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.511900007724762},{"id":"https://openalex.org/keywords/auditory-scene-analysis","display_name":"Auditory scene analysis","score":0.5001999735832214},{"id":"https://openalex.org/keywords/sound","display_name":"Sound (geography)","score":0.49540001153945923},{"id":"https://openalex.org/keywords/computational-auditory-scene-analysis","display_name":"Computational auditory scene analysis","score":0.4781000018119812},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4643000066280365}],"concepts":[{"id":"https://openalex.org/C132829578","wikidata":"https://www.wikidata.org/wiki/Q581151","display_name":"Situated","level":2,"score":0.5845999717712402},{"id":"https://openalex.org/C187029079","wikidata":"https://www.wikidata.org/wiki/Q958679","display_name":"Cognitive reframing","level":2,"score":0.5813999772071838},{"id":"https://openalex.org/C100142294","wikidata":"https://www.wikidata.org/wiki/Q2070426","display_name":"Selective auditory attention","level":4,"score":0.5756000280380249},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.558899998664856},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.513700008392334},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.511900007724762},{"id":"https://openalex.org/C38129911","wikidata":"https://www.wikidata.org/wiki/Q4820038","display_name":"Auditory scene analysis","level":3,"score":0.5001999735832214},{"id":"https://openalex.org/C203718221","wikidata":"https://www.wikidata.org/wiki/Q491713","display_name":"Sound (geography)","level":2,"score":0.49540001153945923},{"id":"https://openalex.org/C73208851","wikidata":"https://www.wikidata.org/wiki/Q5157303","display_name":"Computational auditory scene analysis","level":2,"score":0.4781000018119812},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4643000066280365},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.4000000059604645},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.3880000114440918},{"id":"https://openalex.org/C188147891","wikidata":"https://www.wikidata.org/wiki/Q147638","display_name":"Cognitive science","level":1,"score":0.38679999113082886},{"id":"https://openalex.org/C3020799230","wikidata":"https://www.wikidata.org/wiki/Q160289","display_name":"Auditory perception","level":3,"score":0.33980000019073486},{"id":"https://openalex.org/C177291462","wikidata":"https://www.wikidata.org/wiki/Q423038","display_name":"Active listening","level":2,"score":0.3070000112056732},{"id":"https://openalex.org/C142795923","wikidata":"https://www.wikidata.org/wiki/Q1358257","display_name":"Soundscape","level":3,"score":0.3066999912261963},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.30399999022483826},{"id":"https://openalex.org/C171179263","wikidata":"https://www.wikidata.org/wiki/Q4820026","display_name":"Auditory display","level":2,"score":0.30059999227523804},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.2955000102519989},{"id":"https://openalex.org/C77801330","wikidata":"https://www.wikidata.org/wiki/Q1363413","display_name":"Auditory masking","level":3,"score":0.2809999883174896},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.2741999924182892},{"id":"https://openalex.org/C2777443451","wikidata":"https://www.wikidata.org/wiki/Q821413","display_name":"Auditory system","level":2,"score":0.27140000462532043},{"id":"https://openalex.org/C59656382","wikidata":"https://www.wikidata.org/wiki/Q191536","display_name":"Conjunction (astronomy)","level":2,"score":0.2700999975204468},{"id":"https://openalex.org/C2777200299","wikidata":"https://www.wikidata.org/wiki/Q52943","display_name":"Conversation","level":2,"score":0.2696000039577484},{"id":"https://openalex.org/C127220857","wikidata":"https://www.wikidata.org/wiki/Q2719318","display_name":"Audio signal processing","level":4,"score":0.2646999955177307},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.2578999996185303},{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.25290000438690186},{"id":"https://openalex.org/C169900460","wikidata":"https://www.wikidata.org/wiki/Q2200417","display_name":"Cognition","level":2,"score":0.2524000108242035},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.25029999017715454}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2508.07829","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2508.07829","pdf_url":"https://arxiv.org/pdf/2508.07829","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2508.07829","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2508.07829","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2508.07829","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2508.07829","pdf_url":"https://arxiv.org/pdf/2508.07829","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recent":[0],"progress":[1],"in":[2,48],"auditory":[3,56,85,112],"intelligence":[4,57],"has":[5],"yielded":[6],"high-performing":[7],"systems":[8],"for":[9,126],"sound":[10],"event":[11],"detection":[12],"(SED),":[13],"acoustic":[14],"scene":[15],"classification":[16],"(ASC),":[17],"automated":[18],"audio":[19,23],"captioning":[20],"(AAC),":[21],"and":[22,67,82,96,110,114],"question":[24],"answering":[25],"(AQA).":[26],"Yet":[27],"these":[28,101],"tasks":[29],"remain":[30],"largely":[31],"constrained":[32],"to":[33,117,128],"surface-level":[34],"recognition-capturing":[35],"what":[36,41,123],"happened":[37],"but":[38],"not":[39],"why,":[40],"it":[42,46,124],"implies,":[43],"or":[44],"how":[45],"unfolds":[47],"context.":[49],"I":[50,73],"propose":[51],"a":[52,59,104,119],"conceptual":[53],"reframing":[54],"of":[55,122],"as":[58],"layered,":[60],"situated":[61],"process":[62],"that":[63],"encompasses":[64],"perception,":[65],"reasoning,":[66],"interaction.":[68],"To":[69],"instantiate":[70],"this":[71],"view,":[72],"introduce":[74],"four":[75],"cognitively":[76],"inspired":[77],"task":[78],"paradigms-ASPIRE,":[79],"SODA,":[80],"AUX,":[81],"AUGMENT-those":[83],"structure":[84],"understanding":[86],"across":[87],"time-frequency":[88],"pattern":[89],"captioning,":[90],"hierarchical":[91],"event/scene":[92],"description,":[93],"causal":[94],"explanation,":[95],"goal-driven":[97],"interpretation,":[98],"respectively.":[99],"Together,":[100],"paradigms":[102],"provide":[103],"roadmap":[105],"toward":[106],"more":[107],"generalizable,":[108],"explainable,":[109],"human-aligned":[111],"intelligence,":[113],"are":[115],"intended":[116],"catalyze":[118],"broader":[120],"discussion":[121],"means":[125],"machines":[127],"understand":[129],"sound.":[130]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-10T00:00:00"}
