{"id":"https://openalex.org/W4409536790","doi":"https://doi.org/10.1109/icvisp64524.2024.10959642","title":"Multimodal Speech Recognition Assisted by Slide Information in Classroom Scenes","display_name":"Multimodal Speech Recognition Assisted by Slide Information in Classroom Scenes","publication_year":2024,"publication_date":"2024-12-27","ids":{"openalex":"https://openalex.org/W4409536790","doi":"https://doi.org/10.1109/icvisp64524.2024.10959642"},"language":"en","primary_location":{"id":"doi:10.1109/icvisp64524.2024.10959642","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icvisp64524.2024.10959642","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 8th International Conference on Vision, Image and Signal Processing (ICVISP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100650906","display_name":"Jinkai Li","orcid":"https://orcid.org/0000-0002-9056-5333"},"institutions":[{"id":"https://openalex.org/I120825670","display_name":"Yunnan Normal University","ror":"https://ror.org/00sc9n023","country_code":"CN","type":"education","lineage":["https://openalex.org/I120825670"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jingen Li","raw_affiliation_strings":["Yunnan Normal University,Key Laboratory of Education Informatization for Nationalities,Kunming,China"],"affiliations":[{"raw_affiliation_string":"Yunnan Normal University,Key Laboratory of Education Informatization for Nationalities,Kunming,China","institution_ids":["https://openalex.org/I120825670"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083400938","display_name":"Jiatian Mei","orcid":null},"institutions":[{"id":"https://openalex.org/I120825670","display_name":"Yunnan Normal University","ror":"https://ror.org/00sc9n023","country_code":"CN","type":"education","lineage":["https://openalex.org/I120825670"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiatian Mei","raw_affiliation_strings":["Yunnan Normal University,Key Laboratory of Education, Informatization for Nationalities &#x0026; Yunnan Key Laboratory of Smart Education,Kunming,China"],"affiliations":[{"raw_affiliation_string":"Yunnan Normal University,Key Laboratory of Education, Informatization for Nationalities &#x0026; Yunnan Key Laboratory of Smart Education,Kunming,China","institution_ids":["https://openalex.org/I120825670"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022267948","display_name":"Di Wu","orcid":"https://orcid.org/0000-0001-7419-9903"},"institutions":[{"id":"https://openalex.org/I120825670","display_name":"Yunnan Normal University","ror":"https://ror.org/00sc9n023","country_code":"CN","type":"education","lineage":["https://openalex.org/I120825670"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Di Wu","raw_affiliation_strings":["Yunnan Normal University,Key Laboratory of Education, Informatization for Nationalities &#x0026; Yunnan Key Laboratory of Smart Education,Kunming,China"],"affiliations":[{"raw_affiliation_string":"Yunnan Normal University,Key Laboratory of Education, Informatization for Nationalities &#x0026; Yunnan Key Laboratory of Smart Education,Kunming,China","institution_ids":["https://openalex.org/I120825670"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005596954","display_name":"Mingtao Zhou","orcid":"https://orcid.org/0009-0004-8562-0061"},"institutions":[{"id":"https://openalex.org/I120825670","display_name":"Yunnan Normal University","ror":"https://ror.org/00sc9n023","country_code":"CN","type":"education","lineage":["https://openalex.org/I120825670"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingtao Zhou","raw_affiliation_strings":["Yunnan Normal University,Key Laboratory of Education, Informatization for Nationalities &#x0026; Yunnan Key Laboratory of Smart Education,Kunming,China"],"affiliations":[{"raw_affiliation_string":"Yunnan Normal University,Key Laboratory of Education, Informatization for Nationalities &#x0026; Yunnan Key Laboratory of Smart Education,Kunming,China","institution_ids":["https://openalex.org/I120825670"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100674244","display_name":"Lin Jiang","orcid":"https://orcid.org/0000-0003-3360-0187"},"institutions":[{"id":"https://openalex.org/I120825670","display_name":"Yunnan Normal University","ror":"https://ror.org/00sc9n023","country_code":"CN","type":"education","lineage":["https://openalex.org/I120825670"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lin Jiang","raw_affiliation_strings":["Yunnan Normal University,Key Laboratory of Education Informatization for Nationalities,Kunming,China"],"affiliations":[{"raw_affiliation_string":"Yunnan Normal University,Key Laboratory of Education Informatization for Nationalities,Kunming,China","institution_ids":["https://openalex.org/I120825670"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100650906"],"corresponding_institution_ids":["https://openalex.org/I120825670"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.34063286,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.980400025844574,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.980400025844574,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.972000002861023,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.929099977016449,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7646737098693848},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5246474742889404},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4167454242706299},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.3372515141963959}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7646737098693848},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5246474742889404},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4167454242706299},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.3372515141963959}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icvisp64524.2024.10959642","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icvisp64524.2024.10959642","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 8th International Conference on Vision, Image and Signal Processing (ICVISP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5357890769","display_name":null,"funder_award_id":"62166050","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1506201762","https://openalex.org/W1989556229","https://openalex.org/W2043331815","https://openalex.org/W2808631503","https://openalex.org/W2886319145","https://openalex.org/W2963242190","https://openalex.org/W2984008963","https://openalex.org/W3097777922","https://openalex.org/W3198533616","https://openalex.org/W4225985539","https://openalex.org/W4283067311","https://openalex.org/W4285074441","https://openalex.org/W4320494062","https://openalex.org/W4375868850","https://openalex.org/W4385823274","https://openalex.org/W4392909899","https://openalex.org/W4402111392","https://openalex.org/W6754420807","https://openalex.org/W6754473786","https://openalex.org/W6755559483","https://openalex.org/W6810176866","https://openalex.org/W6851624667"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Multimodal":[0],"automatic":[1],"speech":[2,16,57,80,94,190,200],"recognition":[3,17,58,81,95,136,191,201],"(ASR)":[4],"technology":[5,96],"has":[6,46],"attracted":[7],"much":[8],"attention":[9],"because":[10],"it":[11],"improves":[12],"the":[13,26,89,104,116,123,128,135,161,197],"accuracy":[14,198],"of":[15,25,37,56,70,91,107,118,137,147,199,215],"by":[18,206],"adding":[19],"other":[20],"modal":[21,133,186],"information.":[22,210],"However,":[23],"most":[24],"existing":[27,92],"applications":[28],"are":[29],"in":[30,63,97,115,127,182],"English":[31],"and":[32,35,59,73,110,139,153,178,213],"conference":[33],"scenarios,":[34],"lack":[36],"Chinese":[38,98,108,119],"application":[39],"scenario":[40],"task":[41],"corpus.":[42],"Online":[43],"course":[44],"learning":[45,50],"become":[47],"a":[48,54,68],"mainstream":[49,175],"method":[51,217],"that":[52,196],"requires":[53],"lot":[55,69],"transcription.":[60],"Presentation":[61],"slides":[62,183],"instructional":[64],"videos":[65,150],"often":[66],"contain":[67],"relevant":[71],"information":[72,181],"can":[74,202],"be":[75,203],"an":[76,112,166,219],"important":[77],"adjunct":[78],"to":[79,85,188],"tasks.":[82,192],"In":[83],"order":[84],"make":[86],"up":[87],"for":[88,170,222],"shortcomings":[90],"multimodal":[93],"scenes,":[99],"this":[100,163,216],"paper":[101,164],"focuses":[102],"on":[103,160,174],"teaching":[105,157],"scenes":[106],"teachers":[109],"constructs":[111,165],"audio-visual":[113],"corpus":[114,145],"classroom":[117,172],"teachers.":[120],"By":[121],"introducing":[122],"technical":[124],"term":[125],"text":[126,180,209],"slide":[129,208],"show":[130,195],"as":[131,184],"additional":[132,185],"input,":[134],"homophones":[138],"proprietary":[140],"terms":[141],"is":[142],"improved.":[143],"The":[144,211],"consists":[146],"424":[148],"teacher":[149,171],"covering":[151],"primary":[152],"junior":[154],"high":[155],"school":[156],"videos.":[158],"Based":[159],"corpus,":[162],"ASR":[167],"benchmark":[168],"pipeline":[169],"based":[173],"Paraformer":[176],"architecture,":[177],"uses":[179],"input":[187],"implement":[189],"Experimental":[193],"results":[194],"improved":[204],"significantly":[205],"using":[207],"feasibility":[212],"effectiveness":[214],"provide":[218],"effective":[220],"solution":[221],"intelligent":[223],"education":[224],"field.":[225]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
