{"id":"https://openalex.org/W7152107554","doi":"https://doi.org/10.48550/arxiv.2604.05375","title":"DAT: Dual-Aware Adaptive Transmission for Efficient Multimodal LLM Inference in Edge-Cloud Systems","display_name":"DAT: Dual-Aware Adaptive Transmission for Efficient Multimodal LLM Inference in Edge-Cloud Systems","publication_year":2026,"publication_date":"2026-04-07","ids":{"openalex":"https://openalex.org/W7152107554","doi":"https://doi.org/10.48550/arxiv.2604.05375"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.05375","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.05375","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.05375","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133175037","display_name":"Qi Guo","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Guo, Qi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133190598","display_name":"Zheming Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Zheming","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122594339","display_name":"Yunqing Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Yunqing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133165089","display_name":"Chang Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Chang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133155463","display_name":"Wen Ji","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ji, Wen","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5133175037"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.6061000227928162,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.6061000227928162,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.17900000512599945,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.02280000038444996,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6588000059127808},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.5838000178337097},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.4950999915599823},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.45100000500679016},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.4138000011444092},{"id":"https://openalex.org/keywords/transmission","display_name":"Transmission (telecommunications)","score":0.34860000014305115},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.32690000534057617}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8399999737739563},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6588000059127808},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.5838000178337097},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5188000202178955},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.4950999915599823},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.45100000500679016},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.4138000011444092},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.350600004196167},{"id":"https://openalex.org/C761482","wikidata":"https://www.wikidata.org/wiki/Q118093","display_name":"Transmission (telecommunications)","level":2,"score":0.34860000014305115},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.32690000534057617},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.321399986743927},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.32010000944137573},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.3089999854564667},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.302700012922287},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.2989000082015991},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.2969000041484833},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.28519999980926514},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.272599995136261},{"id":"https://openalex.org/C105842133","wikidata":"https://www.wikidata.org/wiki/Q1899679","display_name":"Visual communication","level":2,"score":0.25619998574256897},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.2554999887943268}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.05375","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.05375","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.05375","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.05375","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Multimodal":[0],"large":[1],"language":[2],"models":[3],"(MLLMs)":[4],"have":[5],"shown":[6],"strong":[7],"capability":[8],"in":[9,22],"semantic":[10,51,112,127,168],"understanding":[11],"and":[12,29,32,36,56,88,111,121,129,142,158,175,192],"visual":[13,38,58,109,131,179],"reasoning,":[14],"yet":[15],"their":[16],"use":[17],"on":[18,100],"continuous":[19],"video":[20],"streams":[21],"bandwidth-constrained":[23],"edge-cloud":[24],"systems":[25],"incurs":[26],"prohibitive":[27],"computation":[28],"communication":[30],"overhead":[31],"hinders":[33],"low-latency":[34,53,126],"alerting":[35,128],"effective":[37,57,130],"evidence":[39,59,132,180],"delivery.":[40],"To":[41,61,124],"address":[42],"this":[43],"challenge,":[44],"we":[45,67,102,137],"propose":[46,68],"DAT":[47,153],"to":[48,84,173],"achieve":[49],"high-quality":[50],"generation,":[52],"event":[54,117],"alerting,":[55],"supplementation.":[60],"reduce":[62],"unnecessary":[63],"deep":[64],"reasoning":[65],"costs,":[66],"a":[69,81,140],"collaborative":[70],"small-large":[71],"model":[72,78],"cascade.":[73],"A":[74],"lightweight":[75],"edge-side":[76],"small":[77],"acts":[79],"as":[80],"gating":[82],"module":[83],"filter":[85],"non-target-event":[86],"frames":[87],"perform":[89],"object":[90,119],"detection,":[91,120],"triggering":[92],"MLLM":[93],"inference":[94,191],"only":[95],"for":[96],"suspicious":[97],"frames.":[98],"Building":[99],"this,":[101],"introduce":[103],"an":[104],"efficient":[105],"fine-tuning":[106],"strategy":[107],"with":[108],"guidance":[110],"prompting,":[113],"which":[114],"improves":[115],"structured":[116],"understanding,":[118],"output":[122,160],"consistency.":[123,161],"ensure":[125],"supplementation":[133],"under":[134],"bandwidth":[135],"constraints,":[136],"further":[138],"devise":[139],"semantics":[141],"bandwidth-aware":[143],"multi-stream":[144],"adaptive":[145],"transmission":[146],"optimization":[147],"method.":[148],"Experimental":[149],"results":[150],"show":[151],"that":[152],"achieves":[154],"98.83%":[155],"recognition":[156],"accuracy":[157],"100%":[159],"Under":[162],"severe":[163],"congestion,":[164],"it":[165],"reduces":[166],"weighted":[167],"alert":[169],"delay":[170],"by":[171],"up":[172],"77.5%":[174],"delivers":[176],"98.33%":[177],"of":[178,187],"within":[181],"0.5":[182],"s,":[183],"demonstrating":[184],"the":[185],"effectiveness":[186],"jointly":[188],"optimizing":[189],"cascade":[190],"elastic":[193],"transmission.":[194]},"counts_by_year":[],"updated_date":"2026-04-09T06:13:59.934233","created_date":"2026-04-09T00:00:00"}
