{"id":"https://openalex.org/W4415538799","doi":"https://doi.org/10.1145/3746027.3755847","title":"BiOMamba: Mamba-based Forward-Then-Backward Temporal Modeling for Online Action Detection and Anticipation","display_name":"BiOMamba: Mamba-based Forward-Then-Backward Temporal Modeling for Online Action Detection and Anticipation","publication_year":2025,"publication_date":"2025-10-25","ids":{"openalex":"https://openalex.org/W4415538799","doi":"https://doi.org/10.1145/3746027.3755847"},"language":null,"primary_location":{"id":"doi:10.1145/3746027.3755847","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3755847","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5004812842","display_name":"Sensen Wang","orcid":"https://orcid.org/0009-0000-4394-5723"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Sensen Wang","raw_affiliation_strings":["Institute of Artificial Intelligence and Robotics, Xi'an Jiaotong University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"Institute of Artificial Intelligence and Robotics, Xi'an Jiaotong University, Xi'an, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101438777","display_name":"Yuehu Liu","orcid":"https://orcid.org/0000-0002-1048-5115"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuehu Liu","raw_affiliation_strings":["Institute of Artificial Intelligence and Robotics, Xi'an Jiaotong University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"Institute of Artificial Intelligence and Robotics, Xi'an Jiaotong University, Xi'an, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5091893246","display_name":"Chi Zhang","orcid":"https://orcid.org/0000-0001-9604-2800"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chi Zhang","raw_affiliation_strings":["Institute of Artificial Intelligence and Robotics, Xi'an Jiaotong University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"Institute of Artificial Intelligence and Robotics, Xi'an Jiaotong University, Xi'an, China","institution_ids":["https://openalex.org/I87445476"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5004812842"],"corresponding_institution_ids":["https://openalex.org/I87445476"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.32438027,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"787","last_page":"795"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9853000044822693,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.6444000005722046},{"id":"https://openalex.org/keywords/anticipation","display_name":"Anticipation (artificial intelligence)","score":0.5654000043869019},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.544700026512146},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5220000147819519},{"id":"https://openalex.org/keywords/observable","display_name":"Observable","score":0.48910000920295715},{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.40470001101493835},{"id":"https://openalex.org/keywords/context-model","display_name":"Context model","score":0.37779998779296875},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.37139999866485596},{"id":"https://openalex.org/keywords/temporal-resolution","display_name":"Temporal resolution","score":0.36399999260902405}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7455999851226807},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.6444000005722046},{"id":"https://openalex.org/C176777502","wikidata":"https://www.wikidata.org/wiki/Q4774623","display_name":"Anticipation (artificial intelligence)","level":2,"score":0.5654000043869019},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.544700026512146},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5281999707221985},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5220000147819519},{"id":"https://openalex.org/C32848918","wikidata":"https://www.wikidata.org/wiki/Q845789","display_name":"Observable","level":2,"score":0.48910000920295715},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.40470001101493835},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.37779998779296875},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.37139999866485596},{"id":"https://openalex.org/C119666444","wikidata":"https://www.wikidata.org/wiki/Q5977280","display_name":"Temporal resolution","level":2,"score":0.36399999260902405},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3517000079154968},{"id":"https://openalex.org/C169900460","wikidata":"https://www.wikidata.org/wiki/Q2200417","display_name":"Cognition","level":2,"score":0.35010001063346863},{"id":"https://openalex.org/C172081034","wikidata":"https://www.wikidata.org/wiki/Q185961","display_name":"Time perception","level":3,"score":0.3246999979019165},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.29109999537467957},{"id":"https://openalex.org/C527412718","wikidata":"https://www.wikidata.org/wiki/Q855395","display_name":"Interpretation (philosophy)","level":2,"score":0.2874999940395355},{"id":"https://openalex.org/C2777851325","wikidata":"https://www.wikidata.org/wiki/Q7094102","display_name":"Online model","level":2,"score":0.2854999899864197},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.28380000591278076},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.27869999408721924},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2745000123977661},{"id":"https://openalex.org/C2777489503","wikidata":"https://www.wikidata.org/wiki/Q7698936","display_name":"Temporal scales","level":2,"score":0.27090001106262207},{"id":"https://openalex.org/C17289045","wikidata":"https://www.wikidata.org/wiki/Q1138951","display_name":"Cognitive neuroscience","level":3,"score":0.27079999446868896},{"id":"https://openalex.org/C2987834672","wikidata":"https://www.wikidata.org/wiki/Q4677630","display_name":"Action recognition","level":3,"score":0.27070000767707825},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2558000087738037},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.2502000033855438}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3746027.3755847","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3755847","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W2131774270","https://openalex.org/W2342437993","https://openalex.org/W2507009361","https://openalex.org/W2507425266","https://openalex.org/W2524280059","https://openalex.org/W2605300166","https://openalex.org/W2737102415","https://openalex.org/W2897770249","https://openalex.org/W2954020015","https://openalex.org/W2963524571","https://openalex.org/W2989506443","https://openalex.org/W3035130921","https://openalex.org/W4214589115","https://openalex.org/W4312528573","https://openalex.org/W4312690503","https://openalex.org/W4386608764","https://openalex.org/W4390873483","https://openalex.org/W4390874667","https://openalex.org/W4403780569","https://openalex.org/W4410351277","https://openalex.org/W4412119780","https://openalex.org/W4413147869"],"related_works":[],"abstract_inverted_index":{"Given":[0],"that":[1,105],"action":[2,74,82],"evolution":[3],"follows":[4],"temporal":[5,22,38,130,134,142,147,197,207],"progression,":[6],"recent":[7,186],"studies":[8],"for":[9,72,80,158,213],"Online":[10,15,164],"Action":[11,16],"Detection":[12],"(OAD)":[13],"and":[14,76,115,160,173,184,195,215,230],"Anticipation":[17],"(OAA)":[18],"generally":[19],"adopt":[20],"forward":[21,37,133,146,194],"modeling":[23,39,50,85,131,143,208],"to":[24,46,113,127,139,144,168,205],"capture":[25],"dependencies":[26,198],"in":[27,61,101,176,199],"observable":[28,59,177],"video":[29,64],"sequences.":[30],"However,":[31],"the":[32,48,54,57,62,68,77,90,97,106,117,137,170,200,206],"strictly":[33],"sequential":[34],"nature":[35],"of":[36,119],"prevents":[40],"subsequent":[41],"frames":[42],"from":[43,96],"being":[44],"used":[45],"enhance":[47,145],"earlier":[49],"process.":[51],"In":[52],"particular,":[53],"current":[55,214],"frame,":[56],"last":[58],"frame":[60],"online":[63],"stream,":[65],"serves":[66],"as":[67],"direct":[69],"visual":[70],"cue":[71],"ongoing":[73],"recognition":[75],"informative":[78],"context":[79],"future":[81,216],"anticipation.":[83],"As":[84],"errors":[86],"accumulate":[87],"over":[88],"time,":[89],"resulting":[91],"representations":[92,212],"may":[93],"progressively":[94],"deviate":[95],"actual":[98],"semantics.":[99],"Findings":[100],"cognitive":[102],"neuroscience":[103],"show":[104],"hippocampus":[107],"performs":[108],"backward":[109,129,141,196],"replay":[110],"after":[111],"observation":[112],"reinforce":[114],"correct":[116],"interpretation":[118],"previous":[120],"observations.":[121],"Inspired":[122],"by":[123],"this,":[124],"we":[125,153],"propose":[126,154],"incorporate":[128],"following":[132],"modeling,":[135],"enabling":[136],"model":[138,157,192],"leverage":[140],"modeling.":[148],"Based":[149],"on":[150,222],"this":[151],"idea,":[152],"a":[155],"unified":[156],"OAD":[159],"OAA,":[161],"named":[162],"Bidirectional":[163],"Mamba":[165],"(BiOMamba).":[166],"Specifically,":[167],"address":[169],"excessive":[171],"length":[172],"relevance":[174],"imbalance":[175],"sequences,":[178],"BiOMamba":[179,190,210,218],"compresses":[180],"distant":[181],"long-term":[182],"memory":[183],"preserves":[185],"short-term":[187],"memory.":[188,202],"Then,":[189],"sequentially":[191],"both":[193],"whole":[201],"Finally,":[203],"according":[204],"result,":[209],"generates":[211],"actions.":[217],"achieves":[219],"state-of-the-art":[220],"performance":[221],"THUMOS'14":[223],"(OAD:":[224,232],"73.3%":[225],"mAP,":[226],"OAA:":[227,235],"59.7%":[228],"mAP)":[229],"TVSeries":[231],"89.9%":[233],"mcAP,":[234],"83.7%":[236],"mcAP).":[237]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-25T00:00:00"}
