{"id":"https://openalex.org/W6941175250","doi":"https://doi.org/10.13016/lgk3-0lv2","title":"DEVELOPING MULTIMODAL LEARNING METHODS FOR VIDEO UNDERSTANDING","display_name":"DEVELOPING MULTIMODAL LEARNING METHODS FOR VIDEO UNDERSTANDING","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W6941175250","doi":"https://doi.org/10.13016/lgk3-0lv2"},"language":"en","primary_location":{"id":"pmh:oai:drum.lib.umd.edu:1903/33411","is_oa":false,"landing_page_url":"http://hdl.handle.net/1903/33411","pdf_url":null,"source":{"id":"https://openalex.org/S4306401518","display_name":"University Libraries (University of Maryland)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I66946132","host_organization_name":"University of Maryland, College Park","host_organization_lineage":["https://openalex.org/I66946132"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Dissertation"},"type":"other","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.13016/lgk3-0lv2","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Sun, Mingwei","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Sun, Mingwei","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T10451","display_name":"Mycorrhizal Fungi and Plant Interactions","score":0.5192000269889832,"subfield":{"id":"https://openalex.org/subfields/1110","display_name":"Plant Science"},"field":{"id":"https://openalex.org/fields/11","display_name":"Agricultural and Biological Sciences"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10451","display_name":"Mycorrhizal Fungi and Plant Interactions","score":0.5192000269889832,"subfield":{"id":"https://openalex.org/subfields/1110","display_name":"Plant Science"},"field":{"id":"https://openalex.org/fields/11","display_name":"Agricultural and Biological Sciences"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.17110000550746918,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10825","display_name":"Plant Pathogens and Fungal Diseases","score":0.03150000050663948,"subfield":{"id":"https://openalex.org/subfields/1307","display_name":"Cell Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.47450000047683716},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4465999901294708},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.42410001158714294},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.3898000121116638},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.3785000145435333},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.3741999864578247},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.35910001397132874},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.3359000086784363}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7648000121116638},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.47450000047683716},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.4553999900817871},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4465999901294708},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.42410001158714294},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40799999237060547},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.3898000121116638},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.3785000145435333},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.3741999864578247},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3691999912261963},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.35910001397132874},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.3359000086784363},{"id":"https://openalex.org/C2778598663","wikidata":"https://www.wikidata.org/wiki/Q1407599","display_name":"Video content analysis","level":4,"score":0.33230000734329224},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3199000060558319},{"id":"https://openalex.org/C2776566319","wikidata":"https://www.wikidata.org/wiki/Q3495514","display_name":"Interactive video","level":2,"score":0.3188000023365021},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.31299999356269836},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.3012000024318695},{"id":"https://openalex.org/C22561748","wikidata":"https://www.wikidata.org/wiki/Q854954","display_name":"Videoconferencing","level":2,"score":0.2924000024795532},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.28439998626708984},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2833999991416931},{"id":"https://openalex.org/C2780660688","wikidata":"https://www.wikidata.org/wiki/Q25052564","display_name":"Multimodal learning","level":2,"score":0.2759000062942505},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.263700008392334}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:drum.lib.umd.edu:1903/33411","is_oa":false,"landing_page_url":"http://hdl.handle.net/1903/33411","pdf_url":null,"source":{"id":"https://openalex.org/S4306401518","display_name":"University Libraries (University of Maryland)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I66946132","host_organization_name":"University of Maryland, College Park","host_organization_lineage":["https://openalex.org/I66946132"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Dissertation"},{"id":"doi:10.13016/lgk3-0lv2","is_oa":true,"landing_page_url":"https://doi.org/10.13016/lgk3-0lv2","pdf_url":null,"source":{"id":"https://openalex.org/S4306402644","display_name":"Digital Repository at the University of Maryland (University of Maryland College Park)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I66946132","host_organization_name":"University of Maryland, College Park","host_organization_lineage":["https://openalex.org/I66946132"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"thesis"}],"best_oa_location":{"id":"doi:10.13016/lgk3-0lv2","is_oa":true,"landing_page_url":"https://doi.org/10.13016/lgk3-0lv2","pdf_url":null,"source":{"id":"https://openalex.org/S4306402644","display_name":"Digital Repository at the University of Maryland (University of Maryland College Park)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I66946132","host_organization_name":"University of Maryland, College Park","host_organization_lineage":["https://openalex.org/I66946132"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"thesis"},"sustainable_development_goals":[{"score":0.8172367215156555,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"In":[0,304,369,449,524],"recent":[1],"years,":[2],"the":[3,47,68,78,101,114,143,193,215,244,255,258,275,280,286,314,334,345,352,361,364,370,379,385,416,436,453,456,468,477,489,509,513,522,525,562,565,579,596,605,610,621,624,642,668,671,682,688,692,696,704],"field":[4,705],"of":[5,52,57,72,80,97,103,120,130,145,150,160,174,217,228,319,333,347,363,387,415,455,483,491,498,512,545,564,600,616,623,659,667,670,684,695],"deep":[6,156],"learning,":[7,15],"with":[8,127,208,404],"a":[9,50,95,172,209,249,299,375,426,462,480,503,530,542,570,585,613,629,657,664,708],"particular":[10],"emphasis":[11],"on":[12,202,298,327,390,425,716],"multimodal":[13,533,617],"representation":[14,544],"has":[16,45],"experienced":[17],"significant":[18,190],"advancements.":[19],"These":[20,110,139,186],"advancements":[21],"are":[22,235],"largely":[23],"attributable":[24],"to":[25,84,142,154,170,237,252,273,312,343,351,407,451,466,556,575,594,603,662,680,687,703,712],"groundbreaking":[26],"progress":[27,44],"in":[28,59,94,220,279,294,442,495,521,540,649],"areas":[29],"such":[30,133],"as":[31,122,124,134,502],"computer":[32],"vision,":[33],"voice":[34],"recognition,":[35],"natural":[36],"language":[37],"processing,":[38],"and":[39,77,86,117,136,183,231,240,260,321,324,366,400,410,519,548,602],"graph":[40],"network":[41,137,200,269,591],"learning.":[42,618],"This":[43,355,507,590,700],"paved":[46],"way":[48],"for":[49,75,181,214,384,516],"multitude":[51],"new":[53],"applications.":[54,99],"The":[55,82,148,282,393,413,432,473],"domain":[56],"video,":[58,121],"particular,":[60],"holds":[61],"immense":[62],"potential.":[63],"Video":[64],"is":[65,153,224,270,337,419,592],"often":[66],"considered":[67],"most":[69],"potent":[70],"form":[71],"digital":[73],"content":[74,90,106,184],"communication":[76],"dissemination":[79],"information.":[81],"ability":[83],"effectively":[85,253],"efficiently":[87],"comprehend":[88],"video":[89,105,146,162,223,328,391,651,714,718],"could":[91,177,188],"prove":[92],"instrumental":[93],"variety":[96],"downstream":[98],"However,":[100],"task":[102],"understanding":[104,161,482],"presents":[107],"numerous":[108],"challenges.":[109],"challenges":[111],"stem":[112],"from":[113,476,578,632],"inherently":[115],"unstructured":[116,131],"complex":[118],"nature":[119],"well":[123],"its":[125,338,367],"interactions":[126,256],"other":[128],"forms":[129],"data,":[132],"text":[135,401],"data.":[138,392],"factors":[140],"contribute":[141],"difficulty":[144],"analysis.":[147],"objective":[149],"this":[151],"dissertation":[152],"develop":[155],"learning":[157,534,541],"methodologies":[158,168],"capable":[159],"across":[163,470],"multiple":[164],"dimensions.":[165],"Furthermore,":[166,486],"these":[167],"aim":[169],"offer":[171,479],"degree":[173],"interpretability,":[175,339],"which":[176,340,538],"yield":[178],"valuable":[179,358,481],"insights":[180,187,359,474],"researchers":[182,518],"creators.":[185],"have":[189,264,655],"managerial":[191],"implications.In":[192],"first":[194],"study,":[195,372,527],"I":[196,247,306,373,459,487,528,552,568,583,627,654,674],"introduce":[197,374],"an":[198,308,677],"innovative":[199],"based":[201,389],"Long":[203],"Short-Term":[204],"Memory":[205],"(LSTM),":[206],"enhanced":[207],"Transformer":[210,251,588],"co-attention":[211,250,698],"mechanism,":[212],"designed":[213,383,574],"prediction":[216,386],"apparent":[218,296],"emotion":[219,297],"videos.":[221,637],"Each":[222],"segmented":[225],"into":[226,360],"clips":[227],"one-second":[229],"duration,":[230],"pre-trained":[232],"ResNet":[233],"networks":[234],"employed":[236],"extract":[238,576],"audio":[239,259,322],"visual":[241,261,320],"features":[242,262],"at":[243],"second":[245,371],"level.":[246],"construct":[248],"capture":[254],"between":[257,316],"that":[263,435,607,641],"been":[265],"extracted.":[266],"An":[267],"LSTM":[268],"then":[271],"utilized":[272],"learn":[274,604],"spatiotemporal":[276],"information":[277,577,601],"inherent":[278],"video.":[281],"proposed":[283,335,394,417,437,457,514,625,643,672,697],"model,":[284,458,535,626],"termed":[285,536],"Sec2Sec":[287],"Co-attention":[288,381,587],"Transformer,":[289,382],"outperforms":[290],"several":[291],"state-of-the-art":[292,440,554,647],"methods":[293,441],"predicting":[295,496,650,713],"widely":[300],"recognized":[301],"dataset:":[302],"LIRIS-ACCEDE.":[303],"addition,":[305],"conduct":[307],"extensive":[309],"data":[310],"analysis":[311,465,679],"explore":[313],"relationships":[315],"various":[317],"dimensions":[318],"components":[323],"their":[325,405],"influence":[326],"predictions.":[329,368,412,485],"A":[330],"notable":[331],"feature":[332],"model":[336,365,438,557,644,689],"enables":[341],"us":[342],"study":[344,681],"contributions":[346,683],"different":[348,471],"time":[349],"points":[350],"overall":[353],"prediction.":[354],"interpretability":[356,464,678],"provides":[357],"functioning":[362],"novel":[376,531],"neural":[377],"network,":[378],"Multimodal":[380],"personality":[388,484,493],"methodology":[395],"concurrently":[396],"models":[397,648],"audio,":[398],"visual,":[399],"representations,":[402],"along":[403],"intra-relationships,":[406],"achieve":[408],"precise":[409],"efficient":[411],"effectiveness":[414,622],"approach":[418,515,711],"demonstrated":[420],"through":[421],"comprehensive":[422,710],"experiments":[423,639],"conducted":[424,656],"real-world":[427,630],"dataset,":[428],"namely,":[429],"First":[430],"Impressions.":[431],"results":[433],"indicate":[434],"surpasses":[439,645],"performance":[443,454],"while":[444],"preserving":[445],"high":[446],"computational":[447],"efficiency.":[448],"addition":[450],"evaluating":[452],"also":[460,675],"undertake":[461],"thorough":[463],"examine":[467],"contribution":[469],"levels.":[472],"gained":[475],"findings":[478],"illustrate":[488],"practicality":[490],"video-based":[492],"detection":[494],"outcomes":[497],"MBA":[499],"admissions,":[500],"serving":[501],"decision":[504],"support":[505],"system.":[506],"highlights":[508],"potential":[510],"importance":[511],"both":[517],"practitioners":[520],"field.":[523],"third":[526],"present":[529],"generalized":[532,586],"VAN,":[537],"excels":[539],"unified":[543],"\\textbf{v}isual,":[546],"\\textbf{a}coustic,":[547],"\\textbf{n}etwork":[549],"cues.":[550],"Initially,":[551],"utilize":[553],"encoders":[555],"each":[558,685],"modality.":[559],"To":[560,619],"augment":[561],"efficiency":[563],"training":[566],"process,":[567],"adopt":[569],"pre-training":[571],"strategy":[572],"specifically":[573],"music":[580],"network.":[581,589],"Subsequently,":[582],"propose":[584],"engineered":[593],"amalgamate":[595],"three":[597,611],"distinct":[598],"types":[599],"intra-relationships":[606],"exist":[608],"among":[609],"modalities,":[612],"critical":[614],"facet":[615],"assess":[620],"collect":[628],"dataset":[631],"TikTok,":[633],"comprising":[634],"over":[635],"88,000":[636],"Extensive":[638],"demonstrate":[640],"existing":[646],"popularity.":[652],"Moreover,":[653],"series":[658],"ablation":[660],"studies":[661],"attain":[663],"deeper":[665],"comprehension":[666],"behavior":[669],"model.":[673],"perform":[676],"modality":[686],"performance,":[690],"leveraging":[691],"unique":[693],"property":[694],"structure.":[699],"research":[701],"contributes":[702],"by":[706],"proffering":[707],"more":[709],"popularity":[715],"short-form":[717],"platforms.":[719]},"counts_by_year":[],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
