{"id":"https://openalex.org/W4414587226","doi":"https://doi.org/10.48550/arxiv.2505.19812","title":"Efficient Multi-modal Long Context Learning for Training-free Adaptation","display_name":"Efficient Multi-modal Long Context Learning for Training-free Adaptation","publication_year":2025,"publication_date":"2025-05-26","ids":{"openalex":"https://openalex.org/W4414587226","doi":"https://doi.org/10.48550/arxiv.2505.19812"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2505.19812","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2505.19812","pdf_url":"https://arxiv.org/pdf/2505.19812","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2505.19812","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5023181336","display_name":"Zehong Ma","orcid":"https://orcid.org/0009-0005-1533-2651"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ma, Zehong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055433405","display_name":"Shiliang Zhang","orcid":"https://orcid.org/0000-0001-9053-9314"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Shiliang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050660610","display_name":"Longhui Wei","orcid":"https://orcid.org/0000-0001-6916-3009"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei, Longhui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100393506","display_name":"Qi Tian","orcid":"https://orcid.org/0000-0002-7252-5047"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tian, Qi","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5023181336"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11019","display_name":"Image Enhancement Techniques","score":0.9319999814033508,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11019","display_name":"Image Enhancement Techniques","score":0.9319999814033508,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9053000211715698,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2505.19812","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2505.19812","pdf_url":"https://arxiv.org/pdf/2505.19812","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2505.19812","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2505.19812","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2505.19812","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2505.19812","pdf_url":"https://arxiv.org/pdf/2505.19812","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320318398","display_name":"Ant Group","ror":null},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320324787","display_name":"Peking University","ror":"https://ror.org/02v51f717"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4414587226.pdf","grobid_xml":"https://content.openalex.org/works/W4414587226.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Traditional":[0],"approaches":[1],"to":[2,9,111,147],"adapting":[3],"multi-modal":[4,119,168],"large":[5],"language":[6],"models":[7,169],"(MLLMs)":[8],"new":[10],"tasks":[11],"have":[12],"relied":[13],"heavily":[14],"on":[15,133,142],"fine-tuning.":[16],"This":[17,106],"paper":[18],"introduces":[19],"Efficient":[20],"Multi-Modal":[21],"Long":[22],"Context":[23],"Learning":[24],"(EMLoC),":[25],"a":[26,41,63,90,97,123,159],"novel":[27],"training-free":[28],"alternative":[29],"that":[30,138],"embeds":[31],"demonstration":[32],"examples":[33],"directly":[34],"into":[35,77],"the":[36,109,154],"model":[37],"input.":[38],"EMLoC":[39,61,139,157],"offers":[40],"more":[42],"efficient,":[43],"flexible,":[44],"and":[45,58,115,125,164],"scalable":[46,124],"solution":[47,127],"for":[48,118,128,162],"task":[49],"adaptation.":[50],"Because":[51],"extremely":[52],"lengthy":[53],"inputs":[54,76],"introduce":[55],"prohibitive":[56],"computational":[57],"memory":[59,80],"overhead,":[60],"contributes":[62],"chunk-wise":[64],"compression":[65,114],"mechanism":[66],"combined":[67],"with":[68,144],"layer-wise":[69],"adaptive":[70],"pruning.":[71],"It":[72],"condenses":[73],"long-context":[74,120,149],"multimodal":[75],"compact,":[78],"task-specific":[79],"representations.":[81],"By":[82],"adaptively":[83],"pruning":[84,116],"tokens":[85],"at":[86,177],"each":[87],"layer":[88],"under":[89],"Jensen-Shannon":[91],"divergence":[92],"constraint,":[93],"our":[94],"method":[95],"achieves":[96,140],"dramatic":[98],"reduction":[99],"in":[100,170],"inference":[101],"complexity":[102],"without":[103],"sacrificing":[104],"performance.":[105],"approach":[107],"is":[108],"first":[110],"seamlessly":[112],"integrate":[113],"techniques":[117],"learning,":[121],"offering":[122],"efficient":[126,163],"real-world":[129],"applications.":[130],"Extensive":[131],"experiments":[132],"diverse":[134],"vision-language":[135],"benchmarks":[136],"demonstrate":[137],"performance":[141],"par":[143],"or":[145],"superior":[146],"naive":[148],"approaches.":[150],"Our":[151],"results":[152],"highlight":[153],"potential":[155],"of":[156,167],"as":[158],"groundbreaking":[160],"framework":[161],"flexible":[165],"adaptation":[166],"resource-constrained":[171],"environments.":[172],"Codes":[173],"are":[174],"publicly":[175],"available":[176],"https://github.com/Zehong-Ma/EMLoC.":[178]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
