{"id":"https://openalex.org/W4312836468","doi":"https://doi.org/10.1145/3561613.3561633","title":"Research on Classroom Interaction Behavior Analysis Algorithm based on Audio and Video","display_name":"Research on Classroom Interaction Behavior Analysis Algorithm based on Audio and Video","publication_year":2022,"publication_date":"2022-08-19","ids":{"openalex":"https://openalex.org/W4312836468","doi":"https://doi.org/10.1145/3561613.3561633"},"language":"en","primary_location":{"id":"doi:10.1145/3561613.3561633","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3561613.3561633","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 The 5th International Conference on Control and Computer Vision","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5051079756","display_name":"Zhiwei Zheng","orcid":"https://orcid.org/0009-0002-6977-1770"},"institutions":[{"id":"https://openalex.org/I162930524","display_name":"China University of Labor Relations","ror":"https://ror.org/0391zw446","country_code":"CN","type":"education","lineage":["https://openalex.org/I162930524"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhiwei Zheng","raw_affiliation_strings":["China University of Labor Relations, China"],"affiliations":[{"raw_affiliation_string":"China University of Labor Relations, China","institution_ids":["https://openalex.org/I162930524"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101879953","display_name":"Yuting Huang","orcid":"https://orcid.org/0000-0002-1968-6247"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuting Huang","raw_affiliation_strings":["Beijing Institute of Technology, China"],"affiliations":[{"raw_affiliation_string":"Beijing Institute of Technology, China","institution_ids":["https://openalex.org/I125839683"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5051079756"],"corresponding_institution_ids":["https://openalex.org/I162930524"],"apc_list":null,"apc_paid":null,"fwci":0.1007,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.40742466,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"127","last_page":"133"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.8960999846458435,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.8960999846458435,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.8845000267028809,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.8600999712944031,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7526233196258545},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5036560893058777},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.47331273555755615},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.4632182717323303},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.45729222893714905},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.44987478852272034},{"id":"https://openalex.org/keywords/audio-analyzer","display_name":"Audio analyzer","score":0.44566407799720764},{"id":"https://openalex.org/keywords/pooling","display_name":"Pooling","score":0.41178175806999207},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.36535143852233887},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3422483801841736},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.24712374806404114},{"id":"https://openalex.org/keywords/audio-signal-processing","display_name":"Audio signal processing","score":0.20599055290222168},{"id":"https://openalex.org/keywords/audio-signal","display_name":"Audio signal","score":0.1267581284046173}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7526233196258545},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5036560893058777},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.47331273555755615},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.4632182717323303},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.45729222893714905},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.44987478852272034},{"id":"https://openalex.org/C160372630","wikidata":"https://www.wikidata.org/wiki/Q4819855","display_name":"Audio analyzer","level":5,"score":0.44566407799720764},{"id":"https://openalex.org/C70437156","wikidata":"https://www.wikidata.org/wiki/Q7228652","display_name":"Pooling","level":2,"score":0.41178175806999207},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.36535143852233887},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3422483801841736},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.24712374806404114},{"id":"https://openalex.org/C127220857","wikidata":"https://www.wikidata.org/wiki/Q2719318","display_name":"Audio signal processing","level":4,"score":0.20599055290222168},{"id":"https://openalex.org/C64922751","wikidata":"https://www.wikidata.org/wiki/Q4650799","display_name":"Audio signal","level":3,"score":0.1267581284046173},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3561613.3561633","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3561613.3561633","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 The 5th International Conference on Control and Computer Vision","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.8500000238418579,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":5,"referenced_works":["https://openalex.org/W2945883582","https://openalex.org/W2997747012","https://openalex.org/W3034552680","https://openalex.org/W3162241172","https://openalex.org/W3200992602"],"related_works":["https://openalex.org/W2953234277","https://openalex.org/W2626256601","https://openalex.org/W147410782","https://openalex.org/W2900413183","https://openalex.org/W4390975304","https://openalex.org/W3022252430","https://openalex.org/W4287804464","https://openalex.org/W3103989898","https://openalex.org/W3211292372","https://openalex.org/W803346624"],"abstract_inverted_index":{"Classroom":[0],"interaction":[1,21,84,110,132,145,161,180,198,213],"behavior":[2,22,111,133,199,214,288],"research":[3,23],"is":[4,24,140,217,297],"an":[5,60,195],"important":[6],"part":[7],"of":[8,31,45,49,65,73,89,108,122,177,185,274],"classroom":[9,20,54,83,109,123,131,144,160,179,189,197,212,286],"teaching":[10,17,67,90,100],"quality":[11,190],"evaluation,":[12,191],"which":[13],"can":[14,39,57,85],"effectively":[15],"improve":[16],"quality.":[18,68],"Traditional":[19],"mainly":[25],"carried":[26],"out":[27],"in":[28,52,70,112,209,283],"the":[29,42,46,53,66,71,99,106,174,183,210,236,245,250,257,271,279,284,321,325,330,337],"form":[30],"expert":[32],"lectures":[33],"and":[34,62,81,92,95,114,125,138,149,152,166,182,206,219,227,229,267,299,311,329,339,348],"student":[35],"questionnaires.":[36],"This":[37,102],"method":[38],"neither":[40],"make":[41,86,119],"best":[43],"use":[44,88,121],"large":[47],"amount":[48],"data":[50,91],"generated":[51],"scene,":[55],"nor":[56],"it":[58],"provide":[59,93],"objective":[61,96],"detailed":[63],"evaluation":[64,275],"However,":[69],"context":[72],"educational":[74],"informatization,":[75],"using":[76],"information":[77],"technology":[78,156],"to":[79,118,157],"observe":[80],"analyze":[82],"full":[87,120],"timely":[94],"feedback":[97],"on":[98,105,136,235,270,336],"situation.":[101],"paper":[103,193],"focuses":[104],"analysis":[107,134,181,200,215,289],"colleges":[113],"universities.":[115],"In":[116],"order":[117],"audio":[124,137],"video":[126,139],"data,":[127],"a":[128,220,230,291,300,306,312],"framework":[129],"for":[130,188],"based":[135,234],"constructed.":[141],"It":[142],"divides":[143],"behaviors":[146],"into":[147],"verbal":[148,211],"non-verbal":[150,285],"categories,":[151],"uses":[153],"deep":[154],"learning":[155],"realize":[158],"automated":[159],"analysis.":[162],"The":[163,203],"main":[164],"work":[165],"innovations":[167],"are":[168,241,316],"as":[169],"follows:":[170],"(1)":[171],"Combined":[172],"with":[173,244,319],"theoretical":[175],"basis":[176],"traditional":[178],"requirements":[184],"efficient":[186],"classrooms":[187],"this":[192],"constructs":[194],"audio-video-based":[196],"framework.":[201],"(2)":[202],"speaker":[204,252,258],"segmentation":[205,253],"clustering":[207,254],"algorithm":[208,255,282],"task":[216],"improved,":[218],"frame":[221],"feature":[222,302],"extraction":[223],"network":[224,233,295],"integrating":[225],"LSTM":[226],"TDNN":[228],"temporal":[231],"pooling":[232],"dual":[237],"multi-head":[238],"attention":[239],"mechanism":[240],"proposed.":[242],"Compared":[243,318],"DIHARD":[246],"III":[247],"baseline":[248],"network,":[249],"improved":[251],"reduces":[256],"separation":[259],"error":[260],"rate":[261],"(DER)":[262],"by":[263,346],"3.24%,":[264],"3.19%,":[265],"4.53%":[266],"4.14%,":[268],"respectively,":[269],"four":[272],"types":[273],"datasets.":[276],"(3)":[277],"For":[278],"face":[280,293],"detection":[281,294],"interactive":[287],"task,":[290],"single-stage":[292],"FDN":[296,323],"proposed,":[298],"bidirectional":[301],"fusion":[303],"module":[304],"FPN+PANet,":[305],"prediction":[307],"branch":[308],"IoU-":[309],"aware":[310],"loss":[313],"function":[314],"CIoU":[315],"designed.":[317],"RetinaFace,":[320],"final":[322],"has":[324,344],"most":[326],"obvious":[327],"improvement,":[328],"average":[331],"precision":[332],"(Average":[333],"Precision,":[334],"AP)":[335],"verification":[338],"test":[340],"set":[341],"difficult":[342],"targets":[343],"increased":[345],"2.6%":[347],"2.7%,":[349],"respectively.":[350]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
