{"id":"https://openalex.org/W3095902331","doi":"https://doi.org/10.21437/interspeech.2020-1763","title":"An Audio-Enriched BERT-Based Framework for Spoken Multiple-Choice Question Answering","display_name":"An Audio-Enriched BERT-Based Framework for Spoken Multiple-Choice Question Answering","publication_year":2020,"publication_date":"2020-10-25","ids":{"openalex":"https://openalex.org/W3095902331","doi":"https://doi.org/10.21437/interspeech.2020-1763","mag":"3095902331"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2020-1763","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2020-1763","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2020","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102421526","display_name":"Chia-Chih Kuo","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chia-Chih Kuo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067529162","display_name":"Shang-Bao Luo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shang-Bao Luo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5115603153","display_name":"Kuan\u2010Yu Chen","orcid":"https://orcid.org/0000-0002-6036-2199"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kuan-Yu Chen","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5102421526"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.5907,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.87043934,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9951000213623047,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9825999736785889,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7785041332244873},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.5610511302947998},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.42146530747413635},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3634408116340637},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3355402946472168}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7785041332244873},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.5610511302947998},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.42146530747413635},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3634408116340637},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3355402946472168}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2020-1763","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2020-1763","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2020","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2384605597","https://openalex.org/W2387743295","https://openalex.org/W3082787378","https://openalex.org/W2136007095","https://openalex.org/W2366230879","https://openalex.org/W3208425359","https://openalex.org/W2349927912","https://openalex.org/W3192589309"],"abstract_inverted_index":{"In":[0],"a":[1,9,11,78,126,180],"spoken":[2],"multiple-choice":[3],"question":[4],"answering":[5],"(SMCQA)":[6],"task,":[7],"given":[8],"passage,":[10],"question,":[12],"and":[13,176],"multiple":[14],"choices":[15],"all":[16],"in":[17,50,106,171],"the":[18,22,27,32,35,45,55,63,97,112,117,134,145,153],"form":[19],"of":[20,81,119,136,166],"speech,":[21],"machine":[23],"needs":[24],"to":[25,30,54],"pick":[26],"correct":[28],"choice":[29],"answer":[31],"question.":[33],"While":[34],"audio":[36,151],"could":[37],"contain":[38],"useful":[39],"cues":[40],"for":[41],"SMCQA,":[42],"usually":[43],"only":[44,72,132],"auto-transcribed":[46,73],"text":[47,74,93],"is":[48,162],"utilized":[49],"system":[51,114],"development.":[52],"Thanks":[53],"large-scaled":[56],"pre-trained":[57],"language":[58,138],"representation":[59,103],"models,":[60],"such":[61],"as":[62],"bidirectional":[64],"encoder":[65],"representations":[66,139],"from":[67,150],"transformers":[68],"(BERT),":[69],"systems":[70,101,178],"with":[71,152],"can":[75,91],"still":[76],"achieve":[77],"certain":[79],"level":[80],"performance.":[82],"However,":[83],"previous":[84],"studies":[85],"have":[86],"evidenced":[87],"that":[88],"acoustic-level":[89,147],"statistics":[90],"offset":[92],"inaccuracies":[94],"caused":[95],"by":[96,141],"automatic":[98],"speech":[99],"recognition":[100],"or":[102],"inadequacy":[104],"lurking":[105],"word":[107],"embedding":[108],"generators,":[109],"thereby":[110],"making":[111],"SMCQA":[113,128,160,183],"robust.":[115],"Along":[116],"line":[118],"research,":[120],"this":[121],"study":[122],"concentrates":[123],"on":[124,179],"designing":[125],"BERT-based":[127,159],"framework,":[129],"which":[130],"not":[131],"inherits":[133],"advantages":[135],"contextualized":[137],"learned":[140],"BERT,":[142],"but":[143],"integrates":[144],"complementary":[146],"information":[148],"distilled":[149],"text-level":[154],"information.":[155],"Consequently,":[156],"an":[157],"audio-enriched":[158],"framework":[161],"proposed.":[163],"A":[164],"series":[165],"experiments":[167],"demonstrates":[168],"remarkable":[169],"improvements":[170],"accuracy":[172],"over":[173],"selected":[174],"baselines":[175],"SOTA":[177],"published":[181],"Chinese":[182],"dataset.":[184]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":7}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
