{"id":"https://openalex.org/W7156137066","doi":"https://doi.org/10.48550/arxiv.2604.22260","title":"Towards Safe Mobility: A Unified Transportation Foundation Model enabled by Open-Ended Vision-Language Dataset","display_name":"Towards Safe Mobility: A Unified Transportation Foundation Model enabled by Open-Ended Vision-Language Dataset","publication_year":2026,"publication_date":"2026-04-24","ids":{"openalex":"https://openalex.org/W7156137066","doi":"https://doi.org/10.48550/arxiv.2604.22260"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.22260","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.22260","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.22260","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134689036","display_name":"Wenhui Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Wenhui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104805610","display_name":"Songyan Zhang","orcid":"https://orcid.org/0009-0006-2853-8875"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Songyan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134674111","display_name":"Collister Chua","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chua, Collister","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134716582","display_name":"Yang Liang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liang, Yang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102734535","display_name":"Zhiqi Mao","orcid":"https://orcid.org/0009-0003-9432-8655"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mao, Zhiqi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134746637","display_name":"Heng Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Heng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134667760","display_name":"Chen Lv","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lv, Chen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.7713000178337097,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.7713000178337097,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.09130000323057175,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.05869999900460243,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/foundation","display_name":"Foundation (evidence)","score":0.5835999846458435},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5162000060081482},{"id":"https://openalex.org/keywords/intelligent-transportation-system","display_name":"Intelligent transportation system","score":0.4178999960422516},{"id":"https://openalex.org/keywords/pedestrian","display_name":"Pedestrian","score":0.36579999327659607},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.3531000018119812},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.3386000096797943},{"id":"https://openalex.org/keywords/traffic-congestion","display_name":"Traffic congestion","score":0.3248000144958496}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6557999849319458},{"id":"https://openalex.org/C2780966255","wikidata":"https://www.wikidata.org/wiki/Q5474306","display_name":"Foundation (evidence)","level":2,"score":0.5835999846458435},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5162000060081482},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4440999925136566},{"id":"https://openalex.org/C47796450","wikidata":"https://www.wikidata.org/wiki/Q508378","display_name":"Intelligent transportation system","level":2,"score":0.4178999960422516},{"id":"https://openalex.org/C2777113093","wikidata":"https://www.wikidata.org/wiki/Q221488","display_name":"Pedestrian","level":2,"score":0.36579999327659607},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.3531000018119812},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.3386000096797943},{"id":"https://openalex.org/C2779888511","wikidata":"https://www.wikidata.org/wiki/Q244156","display_name":"Traffic congestion","level":2,"score":0.3248000144958496},{"id":"https://openalex.org/C20162079","wikidata":"https://www.wikidata.org/wiki/Q1151406","display_name":"Case-based reasoning","level":2,"score":0.30399999022483826},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.2840999960899353},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.27880001068115234},{"id":"https://openalex.org/C22212356","wikidata":"https://www.wikidata.org/wiki/Q775325","display_name":"Transport engineering","level":1,"score":0.2766999900341034},{"id":"https://openalex.org/C86827895","wikidata":"https://www.wikidata.org/wiki/Q7098582","display_name":"Opportunistic reasoning","level":4,"score":0.274399995803833},{"id":"https://openalex.org/C2989357034","wikidata":"https://www.wikidata.org/wiki/Q7590","display_name":"Transportation infrastructure","level":2,"score":0.27300000190734863},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.26989999413490295},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.26019999384880066},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.26010000705718994},{"id":"https://openalex.org/C89288958","wikidata":"https://www.wikidata.org/wiki/Q7301504","display_name":"Reasoning system","level":2,"score":0.2596000134944916}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.22260","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.22260","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.22260","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.22260","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/11","score":0.5696080923080444,"display_name":"Sustainable cities and communities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Urban":[0],"transportation":[1,33,174],"systems":[2,34],"face":[3],"growing":[4],"safety":[5],"challenges":[6],"that":[7,204],"require":[8],"scalable":[9],"intelligence":[10],"for":[11,65,90],"emerging":[12],"smart":[13],"mobility":[14],"infrastructures.":[15],"While":[16],"recent":[17],"advances":[18],"in":[19,31,93,223],"foundation":[20,63,175,221],"models":[21,64,222],"and":[22,29,61,116,131,148,163,187,199],"large-scale":[23,86],"multimodal":[24],"datasets":[25],"have":[26],"strengthened":[27],"perception":[28],"reasoning":[30,66,92,137,186,211],"intelligent":[32],"(ITS),":[35],"existing":[36,220],"research":[37],"remains":[38],"largely":[39],"centered":[40],"on":[41,197,209],"microscopic":[42,184],"autonomous":[43],"driving":[44],"(AD),":[45],"with":[46,161],"limited":[47],"attention":[48],"to":[49,142,182],"city-scale":[50],"traffic":[51,95,112,189,226],"analysis.":[52],"In":[53],"particular,":[54],"open-ended":[55,91,210],"safety-oriented":[56],"visual":[57],"question":[58],"answering":[59],"(VQA)":[60],"corresponding":[62],"over":[67,138],"heterogeneous":[68,105],"roadside":[69,106],"camera":[70,129],"observations":[71],"remain":[72],"underexplored.":[73],"To":[74,152],"address":[75],"this":[76],"gap,":[77],"we":[78,156,169],"introduce":[79],"the":[80],"Land":[81],"Transportation":[82],"Dataset":[83],"(LTD),":[84],"a":[85,173,192],"open-source":[87],"vision-language":[88,159],"dataset":[89,120],"urban":[94],"environments.":[96],"LTD":[97,198],"contains":[98],"11.6K":[99],"high-quality":[100],"VQA":[101],"pairs":[102],"collected":[103],"from":[104],"cameras,":[107],"spanning":[108],"diverse":[109,214],"road":[110,150],"geometries,":[111],"participants,":[113],"illumination":[114],"conditions,":[115],"adverse":[117],"weather.":[118],"The":[119],"integrates":[121],"three":[122],"complementary":[123],"tasks:":[124],"fine-grained":[125],"multi-object":[126],"grounding,":[127],"multi-image":[128,132],"selection,":[130],"risk":[133],"analysis,":[134],"requiring":[135],"joint":[136],"minimally":[139],"correlated":[140],"views":[141],"infer":[143],"hazardous":[144],"objects,":[145],"contributing":[146],"factors,":[147],"risky":[149],"directions.":[151],"ensure":[153],"annotation":[154],"fidelity,":[155],"combine":[157],"multi-model":[158],"generation":[160],"cross-validation":[162],"human-in-the-loop":[164],"refinement.":[165],"Building":[166],"upon":[167],"LTD,":[168],"further":[170],"propose":[171],"UniVLT,":[172],"model":[176],"trained":[177],"via":[178],"curriculum-based":[179],"knowledge":[180],"transfer":[181],"unify":[183],"AD":[185,201],"macroscopic":[188],"analysis":[190],"within":[191],"single":[193],"architecture.":[194],"Extensive":[195],"experiments":[196],"multiple":[200],"benchmarks":[202],"demonstrate":[203],"UniVLT":[205],"achieves":[206],"SOTA":[207],"performance":[208],"tasks":[212],"across":[213],"domains,":[215],"while":[216],"exposing":[217],"limitations":[218],"of":[219],"complex":[224],"multi-view":[225],"scenarios.":[227]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-04-28T00:00:00"}
