{"id":"https://openalex.org/W4226074103","doi":"https://doi.org/10.1109/asru51503.2021.9687969","title":"Topic Classification on Spoken Documents Using Deep Acoustic and Linguistic Features","display_name":"Topic Classification on Spoken Documents Using Deep Acoustic and Linguistic Features","publication_year":2021,"publication_date":"2021-12-13","ids":{"openalex":"https://openalex.org/W4226074103","doi":"https://doi.org/10.1109/asru51503.2021.9687969"},"language":"en","primary_location":{"id":"doi:10.1109/asru51503.2021.9687969","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru51503.2021.9687969","pdf_url":null,"source":{"id":"https://openalex.org/S4363606113","display_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100686720","display_name":"Tan Liu","orcid":"https://orcid.org/0000-0003-4535-0963"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Tan Liu","raw_affiliation_strings":["University of Science and Technology of China,National Engineering Laboratory for Speech and Language Information Processing,Hefei,China","National Engineering Laboratory for Speech and Language Information Processing, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,National Engineering Laboratory for Speech and Language Information Processing,Hefei,China","institution_ids":["https://openalex.org/I126520041"]},{"raw_affiliation_string":"National Engineering Laboratory for Speech and Language Information Processing, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038369085","display_name":"Wu Guo","orcid":"https://orcid.org/0000-0002-3779-7944"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wu Guo","raw_affiliation_strings":["University of Science and Technology of China,National Engineering Laboratory for Speech and Language Information Processing,Hefei,China","National Engineering Laboratory for Speech and Language Information Processing, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,National Engineering Laboratory for Speech and Language Information Processing,Hefei,China","institution_ids":["https://openalex.org/I126520041"]},{"raw_affiliation_string":"National Engineering Laboratory for Speech and Language Information Processing, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5100686720"],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":0.5026,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.68349224,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"427","last_page":"432"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8076480031013489},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6421844363212585},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.6280160546302795},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6131340861320496},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5834180116653442},{"id":"https://openalex.org/keywords/acoustic-model","display_name":"Acoustic model","score":0.41654467582702637},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.41053932905197144},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.3132433295249939}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8076480031013489},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6421844363212585},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.6280160546302795},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6131340861320496},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5834180116653442},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.41654467582702637},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.41053932905197144},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.3132433295249939}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru51503.2021.9687969","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru51503.2021.9687969","pdf_url":null,"source":{"id":"https://openalex.org/S4363606113","display_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.7300000190734863,"id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G6450856609","display_name":null,"funder_award_id":"U1836219","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W1832693441","https://openalex.org/W2102113734","https://openalex.org/W2120615054","https://openalex.org/W2127141656","https://openalex.org/W2147768505","https://openalex.org/W2273396394","https://openalex.org/W2545177271","https://openalex.org/W2597757402","https://openalex.org/W2749514303","https://openalex.org/W2750499125","https://openalex.org/W2770260627","https://openalex.org/W2891229414","https://openalex.org/W2896457183","https://openalex.org/W2962961016","https://openalex.org/W2963211739","https://openalex.org/W2963288440","https://openalex.org/W2964074409","https://openalex.org/W2972463723","https://openalex.org/W2980708516","https://openalex.org/W3184998487","https://openalex.org/W6675365184","https://openalex.org/W6685380521","https://openalex.org/W6728910023","https://openalex.org/W6735390525","https://openalex.org/W6746393671","https://openalex.org/W6755207826","https://openalex.org/W6767636311","https://openalex.org/W6780226713"],"related_works":["https://openalex.org/W2044408773","https://openalex.org/W2130717063","https://openalex.org/W2981428355","https://openalex.org/W1834994814","https://openalex.org/W4383605217","https://openalex.org/W2041273198","https://openalex.org/W2533508831","https://openalex.org/W1599055764","https://openalex.org/W2742699532","https://openalex.org/W2131711534"],"abstract_inverted_index":{"Topic":[0],"classification":[1,26,59],"systems":[2,160],"on":[3,60,142],"spoken":[4,61,102],"documents":[5],"usually":[6],"consist":[7],"of":[8,42,49,82,101,134],"two":[9,132],"modules:":[10],"an":[11],"automatic":[12],"speech":[13,19],"recognition":[14],"(ASR)":[15],"module":[16,28,115,126],"to":[17,29,111,116,129],"convert":[18],"into":[20],"text":[21,24],"and":[22,52,79,161],"a":[23,65,112,122,143,163],"topic":[25,32,58,138],"(TTC)":[27],"predict":[30],"the":[31,35,44,47,80,83,86,91,97,155],"class":[33],"from":[34,146],"decoded":[36],"text.":[37],"In":[38],"this":[39],"paper,":[40],"instead":[41],"using":[43,71],"ASR":[45],"transcripts,":[46],"fusion":[48],"deep":[50,98,106,118,135],"acoustic":[51,68,99,107],"linguistic":[53,119],"features":[54,100,108,136],"is":[55,76,127],"used":[56,95],"for":[57,137],"documents.":[62,103],"More":[63],"specifically,":[64],"conventional":[66,156],"CTC-based":[67],"model":[69],"(AM)":[70],"phonemes":[72],"as":[73,96],"output":[74],"units":[75],"first":[77],"trained,":[78],"outputs":[81],"layer":[84],"before":[85],"linear":[87],"phoneme":[88],"classifier":[89],"in":[90,166],"trained":[92],"AM":[93],"are":[94,109],"Furthermore,":[104],"these":[105,131],"fed":[110],"phoneme-to-word":[113],"(P2W)":[114],"obtain":[117],"features.":[120],"Finally,":[121],"local":[123],"multi-head":[124],"attention":[125],"proposed":[128,152],"fuse":[130],"types":[133],"classification.":[139],"Experiments":[140],"conducted":[141],"subset":[144],"selected":[145],"Switchboard":[147],"corpus":[148],"show":[149],"that":[150],"our":[151],"framework":[153],"outperforms":[154],"<tex":[157],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[158],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$\\text{ASR}+\\text{TTC}$</tex>":[159],"achieves":[162],"3.13%":[164],"improvement":[165],"ACC.":[167]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
