{"id":"https://openalex.org/W7159659171","doi":"https://doi.org/10.48550/arxiv.2604.27476","title":"EdgeFM: Efficient Edge Inference for Vision-Language Models","display_name":"EdgeFM: Efficient Edge Inference for Vision-Language Models","publication_year":2026,"publication_date":"2026-04-30","ids":{"openalex":"https://openalex.org/W7159659171","doi":"https://doi.org/10.48550/arxiv.2604.27476"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.27476","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.27476","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.27476","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5059416101","display_name":"Mengling Deng","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Deng, Mengling","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069046107","display_name":"Yuanpeng Chen","orcid":"https://orcid.org/0009-0004-0417-7353"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Yuanpeng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134997955","display_name":"Sheng Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Sheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134975370","display_name":"Wei Tao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tao, Wei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101826555","display_name":"Wenhai Zhang","orcid":"https://orcid.org/0000-0003-0645-3062"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Wenhai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134959868","display_name":"Hui Song","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Song, Hui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134940227","display_name":"Linyuanhao Qin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qin, Linyuanhao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134963398","display_name":"Kai Zhao","orcid":"https://orcid.org/0000-0003-4583-1855"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Kai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134954355","display_name":"Xiaojun Ye","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ye, Xiaojun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058203407","display_name":"Shanhui Mo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mo, Shanhui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134965125","display_name":"Jingli Fan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fan, Jingli","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134992732","display_name":"Shuang Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Shuang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134979400","display_name":"Bei Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Bei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134989472","display_name":"Tiankun Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Tiankun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5034574139","display_name":"Xiangjing An","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"An, Xiangjing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":15,"corresponding_author_ids":["https://openalex.org/A5059416101"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.4747999906539917,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.4747999906539917,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.2809000015258789,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.023800000548362732,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.7513999938964844},{"id":"https://openalex.org/keywords/modular-design","display_name":"Modular design","score":0.6542999744415283},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.6496000289916992},{"id":"https://openalex.org/keywords/enhanced-data-rates-for-gsm-evolution","display_name":"Enhanced Data Rates for GSM Evolution","score":0.545199990272522},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.5440999865531921},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.5212000012397766},{"id":"https://openalex.org/keywords/x86","display_name":"x86","score":0.47510001063346863},{"id":"https://openalex.org/keywords/edge-device","display_name":"Edge device","score":0.45260000228881836}],"concepts":[{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.7513999938964844},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7283999919891357},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.6542999744415283},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.6496000289916992},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.545199990272522},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.5440999865531921},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.5212000012397766},{"id":"https://openalex.org/C170723468","wikidata":"https://www.wikidata.org/wiki/Q182933","display_name":"x86","level":3,"score":0.47510001063346863},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.46160000562667847},{"id":"https://openalex.org/C138236772","wikidata":"https://www.wikidata.org/wiki/Q25098575","display_name":"Edge device","level":3,"score":0.45260000228881836},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.391400009393692},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.36399999260902405},{"id":"https://openalex.org/C46743427","wikidata":"https://www.wikidata.org/wiki/Q1341685","display_name":"Inference engine","level":3,"score":0.36149999499320984},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.3564000129699707},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.3506999909877777},{"id":"https://openalex.org/C2778456923","wikidata":"https://www.wikidata.org/wiki/Q5337692","display_name":"Edge computing","level":3,"score":0.3328999876976013},{"id":"https://openalex.org/C177225278","wikidata":"https://www.wikidata.org/wiki/Q192674","display_name":"Factoring","level":2,"score":0.3294999897480011},{"id":"https://openalex.org/C112313634","wikidata":"https://www.wikidata.org/wiki/Q7886648","display_name":"Complement (music)","level":5,"score":0.3102000057697296},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.30869999527931213},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.29580000042915344},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2955000102519989},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.2888000011444092},{"id":"https://openalex.org/C2777472644","wikidata":"https://www.wikidata.org/wiki/Q16968992","display_name":"Approximate inference","level":3,"score":0.259799987077713},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.2522999942302704}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.27476","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.27476","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.27476","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.27476","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Vision-language":[0],"models":[1],"(VLMs)":[2],"have":[3],"demonstrated":[4],"strong":[5],"applicability":[6],"in":[7],"edge":[8,88,205],"industrial":[9,87,206],"applications,":[10],"yet":[11],"their":[12],"deployment":[13,152],"remains":[14],"severely":[15],"constrained":[16],"by":[17,131],"requirements":[18],"for":[19,71,85,120,203],"deterministic":[20],"low":[21],"latency":[22],"and":[23,50,62,98,142,146],"stable":[24],"execution":[25],"under":[26],"resource":[27],"limitations.":[28],"Existing":[29],"frameworks":[30],"either":[31],"rely":[32],"on":[33,153,183],"bloated":[34],"general-purpose":[35],"designs":[36],"or":[37],"force":[38],"developers":[39],"into":[40],"opaque,":[41],"hardware-specific":[42],"closed-source":[43,121],"ecosystems,":[44],"leading":[45],"to":[46,65,94,177],"hardware":[47],"lock-in":[48],"limitation":[49],"poor":[51],"cross-platform":[52,86,160],"adaptability.":[53],"Observing":[54],"that":[55,191],"modern":[56],"AI":[57],"agents":[58],"can":[59],"efficiently":[60],"search":[61],"tune":[63],"configurations":[64],"generate":[66],"highly":[67],"optimized":[68],"low-level":[69],"kernels":[70],"standard":[72],"LLM":[73],"operators,":[74],"we":[75],"propose":[76],"EdgeFM,":[77],"a":[78,104],"lightweight,":[79],"agent-driven":[80],"VLM/LLM":[81],"inference":[82,169,196],"framework":[83,135],"tailored":[84],"deployment.":[89],"EdgeFM":[90,192],"removes":[91],"non-essential":[92],"features":[93],"reduce":[95],"single-request":[96],"latency,":[97],"encapsulates":[99],"agent-tuned":[100],"kernel":[101],"optimizations":[102],"as":[103],"modular":[105],"library":[106],"of":[107,114],"reusable":[108],"skills.":[109],"By":[110],"allowing":[111],"direct":[112],"invocation":[113],"these":[115],"skills":[116],"rather":[117],"than":[118,171],"waiting":[119],"implementations,":[122],"it":[123,165],"effectively":[124],"closes":[125],"the":[126,148,154,184],"performance":[127,170],"gap":[128],"long":[129],"dominated":[130],"proprietary":[132],"toolchains.":[133],"The":[134],"natively":[136],"supports":[137],"mainstream":[138],"platforms":[139],"including":[140],"x86":[141],"NVIDIA":[143,185],"Orin":[144,186],"SoCs,":[145],"represents":[147],"first":[149],"end-to-end":[150,195],"VLA":[151],"domestic":[155],"Horizon":[156],"Journey":[157],"platform,":[158],"enhancing":[159],"portability.":[161],"In":[162],"most":[163],"cases,":[164],"yields":[166],"clearly":[167],"better":[168],"conventional":[172],"vendor-specific":[173],"toolchains,":[174],"achieving":[175],"up":[176],"1.49":[178],"times":[179],"speedup":[180],"over":[181],"TensorRT-Edge-LLM":[182],"platform.":[187],"Experimental":[188],"results":[189],"show":[190],"delivers":[193],"favorable":[194],"performance,":[197],"providing":[198],"an":[199],"open-source,":[200],"production-grade":[201],"solution":[202],"diverse":[204],"scenarios.":[207]},"counts_by_year":[],"updated_date":"2026-05-02T06:10:54.344120","created_date":"2026-05-02T00:00:00"}
