{"id":"https://openalex.org/W4415538354","doi":"https://doi.org/10.1145/3746027.3755363","title":"Mavors: Multi-granularity Video Representation for Multimodal Large Language Model","display_name":"Mavors: Multi-granularity Video Representation for Multimodal Large Language Model","publication_year":2025,"publication_date":"2025-10-25","ids":{"openalex":"https://openalex.org/W4415538354","doi":"https://doi.org/10.1145/3746027.3755363"},"language":null,"primary_location":{"id":"doi:10.1145/3746027.3755363","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3755363","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Yang Shi","orcid":"https://orcid.org/0009-0003-9241-236X"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yang Shi","raw_affiliation_strings":["Peking University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0003-9241-236X","affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032858379","display_name":"Jiaheng Liu","orcid":"https://orcid.org/0000-0002-5183-8538"},"institutions":[{"id":"https://openalex.org/I3923682","display_name":"Soochow University","ror":"https://ror.org/05t8y2r12","country_code":"CN","type":"education","lineage":["https://openalex.org/I3923682"]},{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiaheng Liu","raw_affiliation_strings":["Nanjing University, Suzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-5183-8538","affiliations":[{"raw_affiliation_string":"Nanjing University, Suzhou, China","institution_ids":["https://openalex.org/I3923682","https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065487736","display_name":"Yushuo Guan","orcid":"https://orcid.org/0000-0001-5258-2397"},"institutions":[{"id":"https://openalex.org/I4210155967","display_name":"OriginWater (China)","ror":"https://ror.org/04h7gmn81","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210155967"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yushuo Guan","raw_affiliation_strings":["Kling Team, Kuaishou Technology, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-5258-2397","affiliations":[{"raw_affiliation_string":"Kling Team, Kuaishou Technology, Beijing, China","institution_ids":["https://openalex.org/I4210155967"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Zhenhua Wu","orcid":"https://orcid.org/0009-0007-4313-2192"},"institutions":[{"id":"https://openalex.org/I4401726859","display_name":"Kuaishou (China)","ror":"https://ror.org/0258as409","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726859"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenhua Wu","raw_affiliation_strings":["Kuaishou Technology, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0007-4313-2192","affiliations":[{"raw_affiliation_string":"Kuaishou Technology, Beijing, China","institution_ids":["https://openalex.org/I4401726859"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020022791","display_name":"Yuanxing Zhang","orcid":"https://orcid.org/0000-0003-1460-8124"},"institutions":[{"id":"https://openalex.org/I4210155967","display_name":"OriginWater (China)","ror":"https://ror.org/04h7gmn81","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210155967"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuanxing Zhang","raw_affiliation_strings":["Kling Team, Kuaishou Technology, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-1460-8124","affiliations":[{"raw_affiliation_string":"Kling Team, Kuaishou Technology, Beijing, China","institution_ids":["https://openalex.org/I4210155967"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023053922","display_name":"Zihao Wang","orcid":"https://orcid.org/0000-0003-1869-113X"},"institutions":[{"id":"https://openalex.org/I4210155967","display_name":"OriginWater (China)","ror":"https://ror.org/04h7gmn81","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210155967"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zihao Wang","raw_affiliation_strings":["Kling Team, Kuaishou Technology, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-1869-113X","affiliations":[{"raw_affiliation_string":"Kling Team, Kuaishou Technology, Beijing, China","institution_ids":["https://openalex.org/I4210155967"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Weihong Lin","orcid":"https://orcid.org/0000-0003-2440-6585"},"institutions":[{"id":"https://openalex.org/I4210155967","display_name":"OriginWater (China)","ror":"https://ror.org/04h7gmn81","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210155967"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weihong Lin","raw_affiliation_strings":["Kling Team, Kuaishou Technology, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-2440-6585","affiliations":[{"raw_affiliation_string":"Kling Team, Kuaishou Technology, Beijing, China","institution_ids":["https://openalex.org/I4210155967"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076789825","display_name":"Jingyun Hua","orcid":null},"institutions":[{"id":"https://openalex.org/I4401726859","display_name":"Kuaishou (China)","ror":"https://ror.org/0258as409","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726859"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingyun Hua","raw_affiliation_strings":["Kuaishou Technology, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0003-6468-6697","affiliations":[{"raw_affiliation_string":"Kuaishou Technology, Beijing, China","institution_ids":["https://openalex.org/I4401726859"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Zekun Wang","orcid":"https://orcid.org/0009-0007-9128-4254"},"institutions":[{"id":"https://openalex.org/I4210155967","display_name":"OriginWater (China)","ror":"https://ror.org/04h7gmn81","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210155967"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zekun Wang","raw_affiliation_strings":["Kling Team, Kuaishou Technology, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0007-9128-4254","affiliations":[{"raw_affiliation_string":"Kling Team, Kuaishou Technology, Beijing, China","institution_ids":["https://openalex.org/I4210155967"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Xinlong Chen","orcid":"https://orcid.org/0009-0009-7146-9782"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinlong Chen","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0009-7146-9782","affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101788611","display_name":"Bohan Zeng","orcid":"https://orcid.org/0009-0009-0999-6231"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bohan Zeng","raw_affiliation_strings":["Peking University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0009-0999-6231","affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008772211","display_name":"Wentao Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wentao Zhang","raw_affiliation_strings":["Peking University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-7532-5550","affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101586840","display_name":"Fuzheng Zhang","orcid":"https://orcid.org/0000-0002-6079-6392"},"institutions":[{"id":"https://openalex.org/I4401726859","display_name":"Kuaishou (China)","ror":"https://ror.org/0258as409","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726859"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fuzheng Zhang","raw_affiliation_strings":["Kuaishou Technology, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-6079-6392","affiliations":[{"raw_affiliation_string":"Kuaishou Technology, Beijing, China","institution_ids":["https://openalex.org/I4401726859"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016603628","display_name":"Wenjing Yang","orcid":"https://orcid.org/0000-0002-6997-0406"},"institutions":[{"id":"https://openalex.org/I198357462","display_name":"Changsha University","ror":"https://ror.org/011d8sm39","country_code":"CN","type":"education","lineage":["https://openalex.org/I198357462"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenjing Yang","raw_affiliation_strings":["Researcher, Changsha, China"],"raw_orcid":"https://orcid.org/0000-0002-6997-0406","affiliations":[{"raw_affiliation_string":"Researcher, Changsha, China","institution_ids":["https://openalex.org/I198357462"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102941781","display_name":"Di Zhang","orcid":"https://orcid.org/0009-0006-5475-2728"},"institutions":[{"id":"https://openalex.org/I4210155967","display_name":"OriginWater (China)","ror":"https://ror.org/04h7gmn81","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210155967"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Di Zhang","raw_affiliation_strings":["Kling Team, Kuaishou Technology, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0006-5475-2728","affiliations":[{"raw_affiliation_string":"Kling Team, Kuaishou Technology, Beijing, China","institution_ids":["https://openalex.org/I4210155967"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":15,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I20231570"],"apc_list":null,"apc_paid":null,"fwci":1.1332,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.83532743,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"10994","last_page":"11003"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.993399977684021,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4794999957084656},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4767000079154968},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.45840001106262207},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.39160001277923584},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.38420000672340393},{"id":"https://openalex.org/keywords/coherence","display_name":"Coherence (philosophical gambling strategy)","score":0.37630000710487366},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.3698999881744385},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.3375000059604645},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.33160001039505005},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.3294999897480011}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8282999992370605},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6672999858856201},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.527400016784668},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4794999957084656},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4767000079154968},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.45840001106262207},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.39160001277923584},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.38420000672340393},{"id":"https://openalex.org/C2781181686","wikidata":"https://www.wikidata.org/wiki/Q4226068","display_name":"Coherence (philosophical gambling strategy)","level":2,"score":0.37630000710487366},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.3698999881744385},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.3375000059604645},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.33160001039505005},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.3294999897480011},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.328000009059906},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3100999891757965},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.30790001153945923},{"id":"https://openalex.org/C159620131","wikidata":"https://www.wikidata.org/wiki/Q1938983","display_name":"Spatial analysis","level":2,"score":0.3077000081539154},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.2944999933242798},{"id":"https://openalex.org/C124304363","wikidata":"https://www.wikidata.org/wiki/Q673661","display_name":"Abstraction","level":2,"score":0.29330000281333923},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.2840000092983246},{"id":"https://openalex.org/C2985909886","wikidata":"https://www.wikidata.org/wiki/Q193147","display_name":"Spatial coherence","level":3,"score":0.2831999957561493},{"id":"https://openalex.org/C198082294","wikidata":"https://www.wikidata.org/wiki/Q3399648","display_name":"Position (finance)","level":2,"score":0.28040000796318054},{"id":"https://openalex.org/C113364801","wikidata":"https://www.wikidata.org/wiki/Q26674","display_name":"High fidelity","level":2,"score":0.27639999985694885},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.2676999866962433},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.2662999927997589},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.2644999921321869},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.2605000138282776},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.257999986410141},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.2535000145435333},{"id":"https://openalex.org/C128840427","wikidata":"https://www.wikidata.org/wiki/Q1302174","display_name":"Motion compensation","level":2,"score":0.2533000111579895},{"id":"https://openalex.org/C10161872","wikidata":"https://www.wikidata.org/wiki/Q557891","display_name":"Motion estimation","level":2,"score":0.25099998712539673},{"id":"https://openalex.org/C19768560","wikidata":"https://www.wikidata.org/wiki/Q320727","display_name":"Dependency (UML)","level":2,"score":0.25060001015663147}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3746027.3755363","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3755363","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":5,"referenced_works":["https://openalex.org/W2307512708","https://openalex.org/W4387321091","https://openalex.org/W4390873312","https://openalex.org/W4402716477","https://openalex.org/W4402727142"],"related_works":[],"abstract_inverted_index":{"Long-context":[0],"video":[1,70,81,135],"understanding":[2,136],"in":[3,41,50,153,165],"Multimodal":[4],"Large":[5],"Language":[6],"Models":[7],"(MLLMs)":[8],"faces":[9],"a":[10,64],"critical":[11],"challenge:":[12],"balancing":[13],"computational":[14],"efficiency":[15],"with":[16,30,52,124],"the":[17,130],"retention":[18],"of":[19],"fine-grained":[20,168],"spatio-temporal":[21,169],"patterns.":[22],"Existing":[23],"approaches":[24],"(e.g.,":[25],"sparse":[26],"sampling,":[27],"dense":[28],"sampling":[29],"low":[31],"resolution,":[32],"and":[33,104,107,134,158],"token":[34],"compression)":[35],"suffer":[36],"from":[37],"significant":[38],"information":[39],"loss":[40],"temporal":[42,116,159],"dynamics,":[43],"spatial":[44,99,156],"details,":[45],"or":[46,55],"subtle":[47],"interactions,":[48],"particularly":[49],"videos":[51,142],"complex":[53],"motion":[54],"varying":[56],"resolutions.":[57],"To":[58],"address":[59],"this,":[60],"we":[61],"propose":[62],"Mavors,":[63],"novel":[65],"framework":[66,131],"that":[67,96,114],"introduces":[68],"Multi-granularity":[69],"representation":[71],"for":[72],"holistic":[73],"long-video":[74],"modeling.":[75],"Specifically,":[76],"Mavors":[77],"directly":[78],"encodes":[79],"raw":[80],"content":[82],"into":[83],"latent":[84],"representations":[85],"through":[86],"two":[87],"core":[88],"components:":[89],"1)":[90],"an":[91,109],"Intra-chunk":[92],"Vision":[93,105],"Encoder":[94],"(IVE)":[95],"preserves":[97],"high-resolution":[98],"features":[100],"via":[101,143],"3D":[102],"convolutions":[103],"Transformers,":[106],"2)":[108],"Inter-chunk":[110],"Feature":[111],"Aggregator":[112],"(IFA)":[113],"establishes":[115],"coherence":[117],"across":[118,147],"chunks":[119],"using":[120],"transformer-based":[121],"dependency":[122],"modeling":[123],"chunk-level":[125],"rotary":[126],"position":[127],"encodings.":[128],"Moreover,":[129],"unifies":[132],"image":[133],"by":[137],"treating":[138],"images":[139],"as":[140],"single-frame":[141],"sub-image":[144],"decomposition.":[145],"Experiments":[146],"diverse":[148],"benchmarks":[149],"demonstrate":[150],"Mavors'":[151],"superiority":[152],"maintaining":[154],"both":[155],"fidelity":[157],"continuity,":[160],"significantly":[161],"outperforming":[162],"existing":[163],"methods":[164],"tasks":[166],"requiring":[167],"reasoning.":[170]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-25T00:00:00"}
