{"id":"https://openalex.org/W3035762612","doi":"https://doi.org/10.1109/icme46284.2020.9102952","title":"Residual Attention Based Network for Automatic Classification of Phonation Modes","display_name":"Residual Attention Based Network for Automatic Classification of Phonation Modes","publication_year":2020,"publication_date":"2020-06-09","ids":{"openalex":"https://openalex.org/W3035762612","doi":"https://doi.org/10.1109/icme46284.2020.9102952","mag":"3035762612"},"language":"en","primary_location":{"id":"doi:10.1109/icme46284.2020.9102952","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme46284.2020.9102952","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5064077063","display_name":"Xiaoheng Sun","orcid":null},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoheng Sun","raw_affiliation_strings":["School of Computer Science and Technology, Fudan University, Shanghai, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108122621","display_name":"Yiliang Jiang","orcid":"https://orcid.org/0000-0002-2367-5997"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yiliang Jiang","raw_affiliation_strings":["School of Computer Science and Technology, Fudan University, Shanghai, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100318193","display_name":"Wei Li","orcid":"https://orcid.org/0000-0002-4486-8341"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Li","raw_affiliation_strings":["School of Computer Science and Technology, Fudan University, Shanghai, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.7604,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.70649622,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10863","display_name":"Voice and Speech Disorders","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/2737","display_name":"Physiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/phonation","display_name":"Phonation","score":0.9392983913421631},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6405291557312012},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.6300227046012878},{"id":"https://openalex.org/keywords/residual","display_name":"Residual","score":0.616815447807312},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.5705925822257996},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.5653690099716187},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5282313823699951},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5077973008155823},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4978477954864502},{"id":"https://openalex.org/keywords/breathy-voice","display_name":"Breathy voice","score":0.4626771807670593},{"id":"https://openalex.org/keywords/mode","display_name":"Mode (computer interface)","score":0.44935277104377747},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.14030024409294128}],"concepts":[{"id":"https://openalex.org/C173988693","wikidata":"https://www.wikidata.org/wiki/Q678132","display_name":"Phonation","level":2,"score":0.9392983913421631},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6405291557312012},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.6300227046012878},{"id":"https://openalex.org/C155512373","wikidata":"https://www.wikidata.org/wiki/Q287450","display_name":"Residual","level":2,"score":0.616815447807312},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.5705925822257996},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.5653690099716187},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5282313823699951},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5077973008155823},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4978477954864502},{"id":"https://openalex.org/C72453395","wikidata":"https://www.wikidata.org/wiki/Q3328163","display_name":"Breathy voice","level":3,"score":0.4626771807670593},{"id":"https://openalex.org/C48677424","wikidata":"https://www.wikidata.org/wiki/Q6888088","display_name":"Mode (computer interface)","level":2,"score":0.44935277104377747},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.14030024409294128},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C548259974","wikidata":"https://www.wikidata.org/wiki/Q569965","display_name":"Audiology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icme46284.2020.9102952","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme46284.2020.9102952","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.7599999904632568,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W30609886","https://openalex.org/W276227194","https://openalex.org/W1987280716","https://openalex.org/W2009955221","https://openalex.org/W2013374607","https://openalex.org/W2024590593","https://openalex.org/W2051457717","https://openalex.org/W2099773255","https://openalex.org/W2109020278","https://openalex.org/W2157177661","https://openalex.org/W2295471129","https://openalex.org/W2437181147","https://openalex.org/W2508726376","https://openalex.org/W2579593485","https://openalex.org/W2889017834","https://openalex.org/W2889118666","https://openalex.org/W2962858109","https://openalex.org/W2963495494","https://openalex.org/W6601242171","https://openalex.org/W6697155323","https://openalex.org/W6731940827"],"related_works":["https://openalex.org/W2092666606","https://openalex.org/W1980838809","https://openalex.org/W3129442396","https://openalex.org/W4229676021","https://openalex.org/W4244195428","https://openalex.org/W2030224518","https://openalex.org/W4214654144","https://openalex.org/W4248970328","https://openalex.org/W2053621666","https://openalex.org/W3023309240"],"abstract_inverted_index":{"Phonation":[0],"mode":[1],"is":[2,74,131],"an":[3,12],"essential":[4],"characteristic":[5],"of":[6,15,51,63,79,85,118],"singing":[7],"style":[8],"as":[9,11],"well":[10],"important":[13],"expression":[14],"performance.":[16],"It":[17],"can":[18],"be":[19],"classified":[20],"into":[21],"four":[22,120],"categories,":[23],"called":[24],"neutral,":[25],"breathy,":[26],"pressed":[27],"and":[28,36,92],"flow.":[29],"Previous":[30],"studies":[31],"used":[32],"voice":[33],"quality":[34],"features":[35],"feature":[37,90],"engineering":[38],"for":[39,76],"classification.":[40],"While":[41],"deep":[42],"learning":[43],"has":[44],"achieved":[45],"significant":[46],"progress":[47],"in":[48,60,116],"other":[49],"fields":[50],"music":[52],"information":[53],"retrieval":[54],"(MIR),":[55],"there":[56],"are":[57],"few":[58],"attempts":[59],"the":[61,98,108,119,127,136],"classification":[62,78,129],"phonation":[64,80],"modes.":[65,81],"In":[66,105],"this":[67],"study,":[68],"a":[69,86,93,102],"Residual":[70],"Attention":[71],"based":[72],"network":[73,83,88,99,112],"proposed":[75,111],"automatic":[77],"The":[82],"consists":[84],"convolutional":[87],"performing":[89],"processing":[91],"soft":[94],"mask":[95],"branch":[96],"enabling":[97],"focus":[100],"on":[101],"specific":[103],"area.":[104],"comparison":[106],"experiments,":[107],"models":[109],"with":[110],"achieve":[113],"better":[114],"results":[115],"three":[117],"datasets":[121],"than":[122,135],"previous":[123],"works,":[124],"among":[125],"which":[126],"highest":[128],"accuracy":[130],"94.58%,":[132],"2.29%":[133],"higher":[134],"baseline.":[137]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2023,"cited_by_count":3},{"year":2021,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
