{"id":"https://openalex.org/W7129207663","doi":"https://doi.org/10.48550/arxiv.2602.13748","title":"RMPL: Relation-aware Multi-task Progressive Learning with Stage-wise Training for Multimedia Event Extraction","display_name":"RMPL: Relation-aware Multi-task Progressive Learning with Stage-wise Training for Multimedia Event Extraction","publication_year":2026,"publication_date":"2026-02-14","ids":{"openalex":"https://openalex.org/W7129207663","doi":"https://doi.org/10.48550/arxiv.2602.13748"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2602.13748","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.13748","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2602.13748","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5126225890","display_name":"Yongkang Jin","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Jin, Yongkang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126262402","display_name":"Jianwen Luo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luo, Jianwen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126260681","display_name":"Jingjing Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Jingjing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091165046","display_name":"Jing Yao","orcid":"https://orcid.org/0000-0003-4747-1216"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yao, Jianmin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5126181324","display_name":"Yu Hong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hong, Yu","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5126225890"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9588000178337097,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9588000178337097,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.011900000274181366,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.004999999888241291,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.7106000185012817},{"id":"https://openalex.org/keywords/schema","display_name":"Schema (genetic algorithms)","score":0.6482999920845032},{"id":"https://openalex.org/keywords/argument","display_name":"Argument (complex analysis)","score":0.554099977016449},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.5218999981880188},{"id":"https://openalex.org/keywords/distributional-semantics","display_name":"Distributional semantics","score":0.5123000144958496},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.46810001134872437},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.420199990272522},{"id":"https://openalex.org/keywords/relationship-extraction","display_name":"Relationship extraction","score":0.3919999897480011}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7994999885559082},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.7106000185012817},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.6482999920845032},{"id":"https://openalex.org/C98184364","wikidata":"https://www.wikidata.org/wiki/Q1780131","display_name":"Argument (complex analysis)","level":2,"score":0.554099977016449},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.5218999981880188},{"id":"https://openalex.org/C2778828372","wikidata":"https://www.wikidata.org/wiki/Q5283209","display_name":"Distributional semantics","level":3,"score":0.5123000144958496},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.49869999289512634},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4900999963283539},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.46810001134872437},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.420199990272522},{"id":"https://openalex.org/C153604712","wikidata":"https://www.wikidata.org/wiki/Q7310755","display_name":"Relationship extraction","level":3,"score":0.3919999897480011},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.3659000098705292},{"id":"https://openalex.org/C25343380","wikidata":"https://www.wikidata.org/wiki/Q277521","display_name":"Relation (database)","level":2,"score":0.36010000109672546},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.3407000005245209},{"id":"https://openalex.org/C2987896495","wikidata":"https://www.wikidata.org/wiki/Q5416716","display_name":"Event data","level":3,"score":0.3278000056743622},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.32760000228881836},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.30239999294281006},{"id":"https://openalex.org/C2989099063","wikidata":"https://www.wikidata.org/wiki/Q667334","display_name":"Learning design","level":2,"score":0.3005000054836273},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.2985000014305115},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.27889999747276306},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.27619999647140503},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.27059999108314514},{"id":"https://openalex.org/C2777810175","wikidata":"https://www.wikidata.org/wiki/Q5416730","display_name":"Event structure","level":2,"score":0.2556999921798706}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2602.13748","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.13748","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2602.13748","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.13748","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Multimedia":[0],"Event":[1],"Extraction":[2],"(MEE)":[3],"aims":[4],"to":[5,132],"identify":[6],"events":[7],"and":[8,17,81,116,147,154],"their":[9],"arguments":[10],"from":[11,112],"documents":[12],"that":[13],"contain":[14],"both":[15],"text":[16],"images.":[18],"It":[19,139],"requires":[20],"grounding":[21,86],"event":[22,79,114,144],"semantics":[23],"across":[24,137,168],"different":[25,169],"modalities.":[26,138],"Progress":[27],"in":[28,87],"MEE":[29,104],"is":[30,40,125,140],"limited":[31],"by":[32],"the":[33,41,159],"lack":[34],"of":[35],"annotated":[36],"training":[37,56],"data.":[38,156],"M2E2":[39,160],"only":[42,49],"established":[43],"benchmark,":[44],"but":[45],"it":[46],"provides":[47],"annotations":[48],"for":[50,103,143],"evaluation.":[51],"This":[52],"makes":[53],"direct":[54],"supervised":[55],"impractical.":[57],"Existing":[58],"methods":[59],"mainly":[60],"rely":[61],"on":[62,158],"cross-modal":[63],"alignment":[64],"or":[65],"inference-time":[66],"prompting":[67],"with":[68,120,128,162],"Vision--Language":[69],"Models":[70],"(VLMs).":[71],"These":[72],"approaches":[73],"do":[74],"not":[75],"explicitly":[76],"learn":[77,133],"structured":[78],"representations":[80,136],"often":[82],"produce":[83],"weak":[84],"argument":[85,148],"multimodal":[88],"settings.":[89,171],"To":[90],"address":[91],"these":[92],"limitations,":[93],"we":[94],"propose":[95],"RMPL,":[96],"a":[97,129],"Relation-aware":[98],"Multi-task":[99],"Progressive":[100],"Learning":[101],"framework":[102],"under":[105],"low-resource":[106],"conditions.":[107],"RMPL":[108],"incorporates":[109],"heterogeneous":[110],"supervision":[111],"unimodal":[113],"extraction":[115,119,150],"multimedia":[117],"relation":[118],"stage-wise":[121],"training.":[122],"The":[123],"model":[124],"first":[126],"trained":[127],"unified":[130],"schema":[131],"shared":[134],"event-centric":[135],"then":[141],"fine-tuned":[142],"mention":[145],"identification":[146],"role":[149],"using":[151],"mixed":[152],"textual":[153],"visual":[155],"Experiments":[157],"benchmark":[161],"multiple":[163],"VLMs":[164],"show":[165],"consistent":[166],"improvements":[167],"modality":[170]},"counts_by_year":[],"updated_date":"2026-02-18T06:25:47.457606","created_date":"2026-02-18T00:00:00"}
