{"id":"https://openalex.org/W4409084572","doi":"https://doi.org/10.32604/cmc.2025.061920","title":"End-to-End Audio Pattern Recognition Network for Overcoming Feature Limitations in Human-Machine Interaction","display_name":"End-to-End Audio Pattern Recognition Network for Overcoming Feature Limitations in Human-Machine Interaction","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4409084572","doi":"https://doi.org/10.32604/cmc.2025.061920"},"language":"en","primary_location":{"id":"doi:10.32604/cmc.2025.061920","is_oa":true,"landing_page_url":"https://doi.org/10.32604/cmc.2025.061920","pdf_url":null,"source":{"id":"https://openalex.org/S4210191605","display_name":"Computers, materials & continua/Computers, materials & continua (Print)","issn_l":"1546-2218","issn":["1546-2218","1546-2226"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computers, Materials &amp; Continua","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.32604/cmc.2025.061920","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030948239","display_name":"Zijian Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zijian Sun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100697596","display_name":"Yaqian Li","orcid":"https://orcid.org/0000-0002-3541-2836"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yaqian Li","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091186772","display_name":"Haoran Liu","orcid":"https://orcid.org/0000-0002-2507-2540"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Haoran Liu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100776607","display_name":"Haibin Li","orcid":"https://orcid.org/0000-0003-0815-594X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Haibin Li","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5112845493","display_name":"Wenming Zhang","orcid":"https://orcid.org/0000-0002-6203-9491"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wenming Zhang","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5030948239"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.6661,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.80648717,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"83","issue":"2","first_page":"3187","last_page":"3210"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9789000153541565,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9789000153541565,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6834380626678467},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.6359696388244629},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5424436330795288},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.5185775756835938},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.469646155834198},{"id":"https://openalex.org/keywords/human-interaction","display_name":"Human interaction","score":0.466152548789978},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4567001163959503},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.278242826461792}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6834380626678467},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.6359696388244629},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5424436330795288},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.5185775756835938},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.469646155834198},{"id":"https://openalex.org/C2987082051","wikidata":"https://www.wikidata.org/wiki/Q223642","display_name":"Human interaction","level":2,"score":0.466152548789978},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4567001163959503},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.278242826461792},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.32604/cmc.2025.061920","is_oa":true,"landing_page_url":"https://doi.org/10.32604/cmc.2025.061920","pdf_url":null,"source":{"id":"https://openalex.org/S4210191605","display_name":"Computers, materials & continua/Computers, materials & continua (Print)","issn_l":"1546-2218","issn":["1546-2218","1546-2226"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computers, Materials &amp; Continua","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.32604/cmc.2025.061920","is_oa":true,"landing_page_url":"https://doi.org/10.32604/cmc.2025.061920","pdf_url":null,"source":{"id":"https://openalex.org/S4210191605","display_name":"Computers, materials & continua/Computers, materials & continua (Print)","issn_l":"1546-2218","issn":["1546-2218","1546-2226"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computers, Materials &amp; Continua","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W1978274651","https://openalex.org/W2030931454","https://openalex.org/W2034329102","https://openalex.org/W2038484192","https://openalex.org/W2064675550","https://openalex.org/W2148154194","https://openalex.org/W2149940198","https://openalex.org/W2310360015","https://openalex.org/W2593451766","https://openalex.org/W2676925568","https://openalex.org/W2803193013","https://openalex.org/W2962785008","https://openalex.org/W2963838685","https://openalex.org/W2987861506","https://openalex.org/W3082878506","https://openalex.org/W3143174503","https://openalex.org/W3158553615","https://openalex.org/W3161026535","https://openalex.org/W3163091219","https://openalex.org/W3172353912","https://openalex.org/W3173311612","https://openalex.org/W3198908807","https://openalex.org/W3207382412","https://openalex.org/W3207699776","https://openalex.org/W3213455868","https://openalex.org/W3213879871","https://openalex.org/W4205633160","https://openalex.org/W4210849290","https://openalex.org/W4220894258","https://openalex.org/W4221046698","https://openalex.org/W4226442948","https://openalex.org/W4295308310","https://openalex.org/W4295957212","https://openalex.org/W4311765953","https://openalex.org/W4317470305","https://openalex.org/W4380987202","https://openalex.org/W4383273343","https://openalex.org/W4385429141","https://openalex.org/W4387101372","https://openalex.org/W4388303844","https://openalex.org/W4390226024","https://openalex.org/W4390918505","https://openalex.org/W4391409096","https://openalex.org/W4393182613","https://openalex.org/W4393558743","https://openalex.org/W4401610853","https://openalex.org/W4401731606","https://openalex.org/W4402334360"],"related_works":["https://openalex.org/W2151749779","https://openalex.org/W3179968364","https://openalex.org/W1999612375","https://openalex.org/W2938107654","https://openalex.org/W3196421258","https://openalex.org/W4387301579","https://openalex.org/W2763956190","https://openalex.org/W3008587939","https://openalex.org/W4405522771","https://openalex.org/W4404782863"],"abstract_inverted_index":{"In":[0],"recent":[1],"years,":[2],"audio":[3,56,64,117,122,229],"pattern":[4,57],"recognition":[5,58],"has":[6],"emerged":[7],"as":[8,33,213],"a":[9,78,81,92,102,113],"key":[10],"area":[11],"of":[12,116,158,185,199],"research,":[13],"driven":[14],"by":[15],"its":[16],"applications":[17],"in":[18,162],"human-computer":[19],"interaction,":[20],"robotics,":[21],"and":[22,41,69,88,91,141,180,189,197,216,225,231],"healthcare.":[23],"Traditional":[24],"methods,":[25],"which":[26,203],"rely":[27],"heavily":[28],"on":[29,173],"handcrafted":[30],"features":[31,108,153],"such":[32,212],"Mel":[34],"filters,":[35],"often":[36],"suffer":[37],"from":[38,109],"information":[39,68,214],"loss":[40,215],"limited":[42,217],"feature":[43,218],"representation":[44,115],"capabilities.":[45],"To":[46],"address":[47],"these":[48,152],"limitations,":[49],"this":[50,220],"study":[51],"proposes":[52],"an":[53],"innovative":[54],"end-to-end":[55],"framework":[59,76],"that":[60,85,96],"directly":[61],"processes":[62],"raw":[63],"signals,":[65],"preserving":[66],"original":[67],"extracting":[70],"effective":[71],"classification":[72,164,183,230],"features.":[73],"The":[74,147],"proposed":[75,201],"utilizes":[77],"dual-branch":[79],"architecture:":[80],"global":[82,135],"refinement":[83,136],"module":[84,95,105,125,137,150],"retains":[86],"channel":[87,140],"temporal":[89,142],"details":[90],"multi-scale":[93,121,139],"embedding":[94,124],"captures":[97],"high-level":[98],"semantic":[99],"information.":[100],"Additionally,":[101],"guided":[103,148],"fusion":[104,149],"integrates":[106],"complementary":[107,159],"both":[110],"branches,":[111],"ensuring":[112],"comprehensive":[114],"data.":[118],"Specifically,":[119],"the":[120,134,169,195,200],"context":[123],"is":[126],"designed":[127],"to":[128,154],"effectively":[129],"extract":[130],"spatiotemporal":[131],"dependencies,":[132],"while":[133],"aggregates":[138],"cues":[143],"for":[144,227],"enhanced":[145],"modeling.":[146],"leverages":[151],"achieve":[155],"efficient":[156],"integration":[157],"information,":[160],"resulting":[161],"improved":[163],"accuracy.":[165],"Experimental":[166],"results":[167,193],"demonstrate":[168],"model\u2019s":[170],"superior":[171],"performance":[172],"multiple":[174],"datasets,":[175],"including":[176],"ESC-50,":[177],"UrbanSound8K,":[178],"RAVDESS,":[179],"CREMA-D,":[181],"with":[182],"accuracies":[184],"93.25%,":[186],"90.91%,":[187],"92.36%,":[188],"70.50%,":[190],"respectively.":[191],"These":[192],"highlight":[194],"robustness":[196],"effectiveness":[198],"framework,":[202],"significantly":[204],"outperforms":[205],"existing":[206],"approaches.":[207],"By":[208],"addressing":[209],"critical":[210],"challenges":[211],"representation,":[219],"work":[221],"provides":[222],"new":[223],"insights":[224],"methodologies":[226],"advancing":[228],"multimodal":[232],"interaction":[233],"systems.":[234]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-04T07:04:00.330322","created_date":"2025-10-10T00:00:00"}
