{"id":"https://openalex.org/W2999652727","doi":"https://doi.org/10.1109/taslp.2020.2966868","title":"Pyramidal Temporal Pooling With Discriminative Mapping for Audio Classification","display_name":"Pyramidal Temporal Pooling With Discriminative Mapping for Audio Classification","publication_year":2020,"publication_date":"2020-01-01","ids":{"openalex":"https://openalex.org/W2999652727","doi":"https://doi.org/10.1109/taslp.2020.2966868","mag":"2999652727"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2020.2966868","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2020.2966868","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100459582","display_name":"Liwen Zhang","orcid":"https://orcid.org/0000-0001-8457-2943"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Liwen Zhang","raw_affiliation_strings":["School of Computer Science and Technology, Harbin Institute of Technology, Harbin, China"],"raw_orcid":"https://orcid.org/0000-0001-8457-2943","affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Harbin Institute of Technology, Harbin, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018232553","display_name":"Ziqiang Shi","orcid":"https://orcid.org/0000-0002-3105-6213"},"institutions":[{"id":"https://openalex.org/I4210159607","display_name":"Fujitsu (China)","ror":"https://ror.org/04w4yzw62","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210159607"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ziqiang Shi","raw_affiliation_strings":["Information Technology Laboratory Fujitsu Research & Development Center Co., LTD., Chaoyang Disrict, China"],"raw_orcid":"https://orcid.org/0000-0002-3105-6213","affiliations":[{"raw_affiliation_string":"Information Technology Laboratory Fujitsu Research & Development Center Co., LTD., Chaoyang Disrict, China","institution_ids":["https://openalex.org/I4210159607"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101508830","display_name":"Jiqing Han","orcid":"https://orcid.org/0000-0002-4297-4300"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiqing Han","raw_affiliation_strings":["School of Computer Science and Technology, Harbin Institute of Technology, Harbin, China"],"raw_orcid":"https://orcid.org/0000-0002-4297-4300","affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Harbin Institute of Technology, Harbin, China","institution_ids":["https://openalex.org/I204983213"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100459582"],"corresponding_institution_ids":["https://openalex.org/I204983213"],"apc_list":null,"apc_paid":null,"fwci":2.583,"has_fulltext":false,"cited_by_count":20,"citation_normalized_percentile":{"value":0.90050238,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":"28","issue":null,"first_page":"770","last_page":"784"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9825000166893005,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.8518955707550049},{"id":"https://openalex.org/keywords/pooling","display_name":"Pooling","score":0.8083767294883728},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.7050026059150696},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7002254128456116},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6749045848846436},{"id":"https://openalex.org/keywords/softmax-function","display_name":"Softmax function","score":0.6465386748313904},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.5755950212478638},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.5490787029266357},{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.5157935619354248},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.44870275259017944},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4382725954055786},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.27942854166030884}],"concepts":[{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.8518955707550049},{"id":"https://openalex.org/C70437156","wikidata":"https://www.wikidata.org/wiki/Q7228652","display_name":"Pooling","level":2,"score":0.8083767294883728},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.7050026059150696},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7002254128456116},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6749045848846436},{"id":"https://openalex.org/C188441871","wikidata":"https://www.wikidata.org/wiki/Q7554146","display_name":"Softmax function","level":3,"score":0.6465386748313904},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.5755950212478638},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.5490787029266357},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.5157935619354248},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.44870275259017944},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4382725954055786},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.27942854166030884},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2020.2966868","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2020.2966868","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.7400000095367432}],"awards":[{"id":"https://openalex.org/G4159508535","display_name":null,"funder_award_id":"U1736210","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6725129794","display_name":null,"funder_award_id":"61471145","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":86,"referenced_works":["https://openalex.org/W81422161","https://openalex.org/W1522301498","https://openalex.org/W1533861849","https://openalex.org/W1565317727","https://openalex.org/W1591676560","https://openalex.org/W1607905124","https://openalex.org/W1686810756","https://openalex.org/W1822987211","https://openalex.org/W1836465849","https://openalex.org/W1904365287","https://openalex.org/W1926645898","https://openalex.org/W1950136256","https://openalex.org/W1970578576","https://openalex.org/W1979173241","https://openalex.org/W1979931042","https://openalex.org/W1993625798","https://openalex.org/W1994452307","https://openalex.org/W1994616650","https://openalex.org/W2017416504","https://openalex.org/W2033875152","https://openalex.org/W2036931953","https://openalex.org/W2048174296","https://openalex.org/W2052711563","https://openalex.org/W2055233384","https://openalex.org/W2072184010","https://openalex.org/W2079735306","https://openalex.org/W2086384421","https://openalex.org/W2095147901","https://openalex.org/W2097508275","https://openalex.org/W2102605133","https://openalex.org/W2109235804","https://openalex.org/W2109300274","https://openalex.org/W2119662931","https://openalex.org/W2124659975","https://openalex.org/W2130248603","https://openalex.org/W2131774270","https://openalex.org/W2137226992","https://openalex.org/W2137343183","https://openalex.org/W2141303268","https://openalex.org/W2155273149","https://openalex.org/W2158915909","https://openalex.org/W2165163503","https://openalex.org/W2168441989","https://openalex.org/W2187846082","https://openalex.org/W2193384753","https://openalex.org/W2198630576","https://openalex.org/W2341412280","https://openalex.org/W2400604769","https://openalex.org/W2408491073","https://openalex.org/W2481910659","https://openalex.org/W2570915410","https://openalex.org/W2717715803","https://openalex.org/W2723513260","https://openalex.org/W2735072998","https://openalex.org/W2759487567","https://openalex.org/W2768083292","https://openalex.org/W2774571176","https://openalex.org/W2775794021","https://openalex.org/W2806708446","https://openalex.org/W2883935097","https://openalex.org/W2889056836","https://openalex.org/W2900315145","https://openalex.org/W2949117887","https://openalex.org/W2963399116","https://openalex.org/W2963723765","https://openalex.org/W2964301993","https://openalex.org/W4295723153","https://openalex.org/W4301866506","https://openalex.org/W6631190155","https://openalex.org/W6631943919","https://openalex.org/W6636472049","https://openalex.org/W6637373629","https://openalex.org/W6638667902","https://openalex.org/W6640036494","https://openalex.org/W6674801095","https://openalex.org/W6680532697","https://openalex.org/W6683178862","https://openalex.org/W6684426735","https://openalex.org/W6686684655","https://openalex.org/W6713285685","https://openalex.org/W6713457403","https://openalex.org/W6714426239","https://openalex.org/W6745170695","https://openalex.org/W6746570019","https://openalex.org/W6752105567","https://openalex.org/W6752516136"],"related_works":["https://openalex.org/W3107204728","https://openalex.org/W3095152779","https://openalex.org/W3128220219","https://openalex.org/W3119773509","https://openalex.org/W3047363187","https://openalex.org/W3177373753","https://openalex.org/W3208297503","https://openalex.org/W2889153461","https://openalex.org/W2964117661","https://openalex.org/W4388405611"],"abstract_inverted_index":{"Audio":[0],"signals":[1],"are":[2],"temporally-structured":[3],"data,":[4],"and":[5,97,112,162,182,188,201],"learning":[6,27,105,184],"their":[7],"discriminative":[8,94,109,139],"representations":[9,111],"containing":[10],"temporal":[11,36,44,55,61,70,86,123,144],"information":[12,45,145],"is":[13,117],"crucial":[14],"for":[15,107],"the":[16,43,64,68,73,83,108,113,122,134,138,143,147,166,191,195,207,221],"audio":[17,25,49,110,152,196],"classification.":[18],"In":[19],"this":[20],"article,":[21],"we":[22,89],"propose":[23],"an":[24,47,78,151,155],"representation":[26,140],"method":[28,106],"with":[29,154,170,216],"a":[30,53,92,98,103,126,130,174],"hierarchical":[31],"pyramid":[32],"structure":[33],"called":[34,115],"pyramidal":[35],"pooling":[37,56,62,87],"(PTP)":[38],"which":[39],"aims":[40],"to":[41],"capture":[42,67],"of":[46,72,129,146],"entire":[48],"sample.":[50],"By":[51,120],"stacking":[52],"global":[54,85],"layer":[57],"on":[58,194,206],"multiple":[59],"local":[60],"layers,":[63],"PTP":[65,161,187],"can":[66,136,164,202],"high-level":[69],"dynamics":[71],"input":[74,167],"feature":[75,168],"sequence":[76,169],"in":[77,82,220],"unsupervised":[79],"way.":[80],"Furthermore,":[81],"top":[84],"layer,":[88],"jointly":[90],"optimize":[91],"learnable":[93],"mapping":[95],"(DM)":[96],"softmax":[99],"classifier.":[100],"Such":[101],"that,":[102],"joint":[104],"classifier":[114],"DM-PTP":[116,135,163,189],"also":[118],"presented.":[119],"treating":[121],"encoding":[124],"as":[125],"low-level":[127],"constraint":[128],"bi-level":[131],"optimization":[132],"problem,":[133],"produce":[137],"while":[141],"maintaining":[142],"whole":[148],"sequence.":[149],"For":[150],"sample":[153],"arbitrary":[156,171],"time":[157],"duration,":[158],"both":[159,186],"our":[160],"encode":[165],"length":[172],"into":[173],"fixed-length":[175],"representation.":[176],"Without":[177],"using":[178],"any":[179],"data":[180],"augmentation":[181],"ensemble":[183],"methods,":[185],"outperform":[190],"state-of-the-art":[192],"CNNs":[193],"event":[197],"recognition":[198],"(AER)":[199],"dataset,":[200],"achieve":[203],"comparable":[204],"performance":[205],"DCASE":[208],"2018":[209],"acoustic":[210],"scene":[211],"classification":[212],"(ASC)":[213],"dataset":[214],"compared":[215],"other":[217],"best":[218],"models":[219],"challenge.":[222]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
