{"id":"https://openalex.org/W3027732850","doi":"https://doi.org/10.1109/lsp.2020.2996085","title":"Learning Temporal Relations from Semantic Neighbors for Acoustic Scene Classification","display_name":"Learning Temporal Relations from Semantic Neighbors for Acoustic Scene Classification","publication_year":2020,"publication_date":"2020-01-01","ids":{"openalex":"https://openalex.org/W3027732850","doi":"https://doi.org/10.1109/lsp.2020.2996085","mag":"3027732850"},"language":"en","primary_location":{"id":"doi:10.1109/lsp.2020.2996085","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2020.2996085","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100459582","display_name":"Liwen Zhang","orcid":"https://orcid.org/0000-0001-8457-2943"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Liwen Zhang","raw_affiliation_strings":["School of Computer Science and Technology, Harbin Institute of Technology, Harbin, China"],"raw_orcid":"https://orcid.org/0000-0001-8457-2943","affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Harbin Institute of Technology, Harbin, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101508830","display_name":"Jiqing Han","orcid":"https://orcid.org/0000-0002-4297-4300"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiqing Han","raw_affiliation_strings":["School of Computer Science and Technology, Harbin Institute of Technology, Harbin, China"],"raw_orcid":"https://orcid.org/0000-0002-4297-4300","affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Harbin Institute of Technology, Harbin, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5018232553","display_name":"Ziqiang Shi","orcid":"https://orcid.org/0000-0002-3105-6213"},"institutions":[{"id":"https://openalex.org/I4210159607","display_name":"Fujitsu (China)","ror":"https://ror.org/04w4yzw62","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210159607"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ziqiang Shi","raw_affiliation_strings":["Information Technology Lab, Fujitsu Research & Development Center Coompany, Ltd., Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-3105-6213","affiliations":[{"raw_affiliation_string":"Information Technology Lab, Fujitsu Research & Development Center Coompany, Ltd., Beijing, China","institution_ids":["https://openalex.org/I4210159607"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100459582"],"corresponding_institution_ids":["https://openalex.org/I204983213"],"apc_list":null,"apc_paid":null,"fwci":3.6466,"has_fulltext":false,"cited_by_count":34,"citation_normalized_percentile":{"value":0.93801788,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"27","issue":null,"first_page":"950","last_page":"954"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9884999990463257,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7579704523086548},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.6976791024208069},{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.6946925520896912},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.6512953042984009},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6360776424407959},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.6229208707809448},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.5191404223442078},{"id":"https://openalex.org/keywords/residual","display_name":"Residual","score":0.4626854658126831},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.43993204832077026},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3475295603275299},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11867371201515198},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.11108466982841492}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7579704523086548},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.6976791024208069},{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.6946925520896912},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.6512953042984009},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6360776424407959},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.6229208707809448},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.5191404223442078},{"id":"https://openalex.org/C155512373","wikidata":"https://www.wikidata.org/wiki/Q287450","display_name":"Residual","level":2,"score":0.4626854658126831},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.43993204832077026},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3475295603275299},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11867371201515198},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.11108466982841492},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lsp.2020.2996085","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2020.2996085","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4159508535","display_name":null,"funder_award_id":"U1736210","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6725129794","display_name":null,"funder_award_id":"61471145","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W81422161","https://openalex.org/W1686810756","https://openalex.org/W2068849277","https://openalex.org/W2072184010","https://openalex.org/W2137226992","https://openalex.org/W2194775991","https://openalex.org/W2296573765","https://openalex.org/W2302255633","https://openalex.org/W2560609797","https://openalex.org/W2619932295","https://openalex.org/W2775505379","https://openalex.org/W2775794021","https://openalex.org/W2883935097","https://openalex.org/W2901459357","https://openalex.org/W2902755038","https://openalex.org/W2921763313","https://openalex.org/W2936774411","https://openalex.org/W2937299048","https://openalex.org/W2962711843","https://openalex.org/W2962835968","https://openalex.org/W2963903710","https://openalex.org/W2972548957","https://openalex.org/W2979750740","https://openalex.org/W2980792518","https://openalex.org/W2982346427","https://openalex.org/W2982625229","https://openalex.org/W2982683041","https://openalex.org/W2999652727","https://openalex.org/W4289288178","https://openalex.org/W4295723153","https://openalex.org/W6637373629","https://openalex.org/W6680532697","https://openalex.org/W6697668227","https://openalex.org/W6752516136","https://openalex.org/W6755726434","https://openalex.org/W6757096825"],"related_works":["https://openalex.org/W2530685530","https://openalex.org/W4375868962","https://openalex.org/W2011227383","https://openalex.org/W2088854863","https://openalex.org/W4402568167","https://openalex.org/W3179495260","https://openalex.org/W1976719989","https://openalex.org/W3127543252","https://openalex.org/W2990636717","https://openalex.org/W2972212393"],"abstract_inverted_index":{"Convolutional":[0,58],"networks":[1],"have":[2],"achieved":[3],"the":[4,13,20,39,88,99,104,109,120,127,141],"state-of-the-art":[5,142],"performance":[6],"on":[7,145],"Acoustic":[8],"Scene":[9],"Classification":[10],"(ASC).":[11],"Given":[12],"Log":[14],"Mel-Spectrogram":[15],"of":[16,79,140],"an":[17,55,92],"audio":[18,70],"sample,":[19],"network":[21],"can":[22,67,102],"extract":[23],"useful":[24],"semantic":[25,77,105,110,133],"contents":[26],"in":[27,126],"a":[28],"certain":[29],"range":[30],"receptive":[31,44,81],"field":[32],"by":[33,72],"stacking":[34],"local":[35],"convolutional":[36],"operations.":[37],"However,":[38],"temporal":[40,74],"relations":[41,75,121],"between":[42,122],"different":[43,80],"fields":[45,82],"are":[46],"not":[47],"captured":[48],"explicitly.":[49],"In":[50],"this":[51],"letter,":[52],"we":[53],"propose":[54],"end-to-end":[56],"3D":[57],"Neural":[59],"Network":[60],"(CNN)":[61],"for":[62],"ASC,":[63],"named":[64],"SeNoT-Net,":[65],"which":[66],"generate":[68],"effective":[69],"representations":[71],"capturing":[73],"from":[76],"neighbors":[78,111],"over":[83,112],"time.":[84],"The":[85,135],"SeNoT-Net":[86,137],"treats":[87],"Log-Mel":[89],"spectrogram":[90],"as":[91],"ordered":[93],"segment-level":[94],"sequence.":[95],"For":[96],"each":[97,123],"segment,":[98],"residual":[100],"block":[101],"produce":[103],"feature":[106,124,128],"maps,":[107],"then":[108],"time":[113],"(SeNoT)":[114],"module":[115],"is":[116],"applied":[117],"to":[118],"capture":[119],"point":[125],"maps":[129],"and":[130,149],"its":[131],"top-k":[132],"neighbors.":[134],"proposed":[136],"outperforms":[138],"most":[139],"CNN":[143],"models":[144],"both":[146],"DCASE":[147],"2018":[148],"2019":[150],"ASC":[151],"datasets.":[152]},"counts_by_year":[{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":10},{"year":2021,"cited_by_count":6},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
