{"id":"https://openalex.org/W4414359923","doi":"https://doi.org/10.24963/ijcai.2025/300","title":"Empowering Multimodal Road Traffic Profiling with Vision Language Models and Frequency Spectrum Fusion","display_name":"Empowering Multimodal Road Traffic Profiling with Vision Language Models and Frequency Spectrum Fusion","publication_year":2025,"publication_date":"2025-09-01","ids":{"openalex":"https://openalex.org/W4414359923","doi":"https://doi.org/10.24963/ijcai.2025/300"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2025/300","is_oa":false,"landing_page_url":"https://doi.org/10.24963/ijcai.2025/300","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Fourth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5003553958","display_name":"Haolong Xiang","orcid":"https://orcid.org/0000-0003-4565-8829"},"institutions":[{"id":"https://openalex.org/I200845125","display_name":"Nanjing University of Information Science and Technology","ror":"https://ror.org/02y0rxk19","country_code":"CN","type":"education","lineage":["https://openalex.org/I200845125"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Haolong Xiang","raw_affiliation_strings":["Nanjing University of Information Science and Technology"],"affiliations":[{"raw_affiliation_string":"Nanjing University of Information Science and Technology","institution_ids":["https://openalex.org/I200845125"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007818509","display_name":"Xiaolong Xu","orcid":"https://orcid.org/0000-0003-4879-9803"},"institutions":[{"id":"https://openalex.org/I200845125","display_name":"Nanjing University of Information Science and Technology","ror":"https://ror.org/02y0rxk19","country_code":"CN","type":"education","lineage":["https://openalex.org/I200845125"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaolong Xu","raw_affiliation_strings":["Nanjing University of Information Science and Technology"],"affiliations":[{"raw_affiliation_string":"Nanjing University of Information Science and Technology","institution_ids":["https://openalex.org/I200845125"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076258208","display_name":"Guangdong Wang","orcid":"https://orcid.org/0000-0001-8300-5656"},"institutions":[{"id":"https://openalex.org/I200845125","display_name":"Nanjing University of Information Science and Technology","ror":"https://ror.org/02y0rxk19","country_code":"CN","type":"education","lineage":["https://openalex.org/I200845125"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guangdong Wang","raw_affiliation_strings":["Nanjing University of Information Science and Technology"],"affiliations":[{"raw_affiliation_string":"Nanjing University of Information Science and Technology","institution_ids":["https://openalex.org/I200845125"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076120553","display_name":"Xuyun Zhang","orcid":"https://orcid.org/0000-0001-7353-4159"},"institutions":[{"id":"https://openalex.org/I99043593","display_name":"Macquarie University","ror":"https://ror.org/01sf06y89","country_code":"AU","type":"education","lineage":["https://openalex.org/I99043593"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Xuyun Zhang","raw_affiliation_strings":["Macquarie University"],"affiliations":[{"raw_affiliation_string":"Macquarie University","institution_ids":["https://openalex.org/I99043593"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071782289","display_name":"Xiaoyong Li","orcid":"https://orcid.org/0000-0002-6580-1648"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoyong Li","raw_affiliation_strings":["National University of Defense Technology"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100360241","display_name":"Qi Zhang","orcid":"https://orcid.org/0000-0001-9595-5640"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qi Zhang","raw_affiliation_strings":["Tongji University"],"affiliations":[{"raw_affiliation_string":"Tongji University","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056293251","display_name":"Amin Beheshti","orcid":"https://orcid.org/0000-0002-5988-5494"},"institutions":[{"id":"https://openalex.org/I99043593","display_name":"Macquarie University","ror":"https://ror.org/01sf06y89","country_code":"AU","type":"education","lineage":["https://openalex.org/I99043593"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Amin Beheshti","raw_affiliation_strings":["Macquarie University"],"affiliations":[{"raw_affiliation_string":"Macquarie University","institution_ids":["https://openalex.org/I99043593"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5116668175","display_name":"Wei Fan","orcid":"https://orcid.org/0000-0001-7656-445X"},"institutions":[{"id":"https://openalex.org/I154130895","display_name":"University of Auckland","ror":"https://ror.org/03b94tp07","country_code":"NZ","type":"education","lineage":["https://openalex.org/I154130895"]}],"countries":["NZ"],"is_corresponding":false,"raw_author_name":"Wei Fan","raw_affiliation_strings":["University of Auckland"],"affiliations":[{"raw_affiliation_string":"University of Auckland","institution_ids":["https://openalex.org/I154130895"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5003553958"],"corresponding_institution_ids":["https://openalex.org/I200845125"],"apc_list":null,"apc_paid":null,"fwci":5.6662,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.96044745,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"2694","last_page":"2702"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9247999787330627,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9247999787330627,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/profiling","display_name":"Profiling (computer programming)","score":0.6245999932289124},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.46299999952316284},{"id":"https://openalex.org/keywords/sensor-fusion","display_name":"Sensor fusion","score":0.45249998569488525},{"id":"https://openalex.org/keywords/advanced-driver-assistance-systems","display_name":"Advanced driver assistance systems","score":0.41769999265670776},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.383899986743927},{"id":"https://openalex.org/keywords/floating-car-data","display_name":"Floating car data","score":0.3589000105857849},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.3458000123500824},{"id":"https://openalex.org/keywords/road-traffic","display_name":"Road traffic","score":0.33959999680519104},{"id":"https://openalex.org/keywords/multimodal-interaction","display_name":"Multimodal interaction","score":0.3391999900341034}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7709000110626221},{"id":"https://openalex.org/C187191949","wikidata":"https://www.wikidata.org/wiki/Q1138496","display_name":"Profiling (computer programming)","level":2,"score":0.6245999932289124},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5277000069618225},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.46299999952316284},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.45249998569488525},{"id":"https://openalex.org/C87833898","wikidata":"https://www.wikidata.org/wiki/Q1060280","display_name":"Advanced driver assistance systems","level":2,"score":0.41769999265670776},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.383899986743927},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.383899986743927},{"id":"https://openalex.org/C64093975","wikidata":"https://www.wikidata.org/wiki/Q356677","display_name":"Floating car data","level":3,"score":0.3589000105857849},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3474000096321106},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.3458000123500824},{"id":"https://openalex.org/C2985695025","wikidata":"https://www.wikidata.org/wiki/Q4323994","display_name":"Road traffic","level":2,"score":0.33959999680519104},{"id":"https://openalex.org/C135641252","wikidata":"https://www.wikidata.org/wiki/Q738567","display_name":"Multimodal interaction","level":2,"score":0.3391999900341034},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.33880001306533813},{"id":"https://openalex.org/C2781317605","wikidata":"https://www.wikidata.org/wiki/Q7832483","display_name":"Traffic analysis","level":2,"score":0.3370000123977661},{"id":"https://openalex.org/C69744172","wikidata":"https://www.wikidata.org/wiki/Q860822","display_name":"Image fusion","level":3,"score":0.33559998869895935},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.3327000141143799},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.31209999322891235},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.2957000136375427},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.2939999997615814},{"id":"https://openalex.org/C2780910867","wikidata":"https://www.wikidata.org/wiki/Q1952416","display_name":"Multimodality","level":2,"score":0.2840999960899353},{"id":"https://openalex.org/C47796450","wikidata":"https://www.wikidata.org/wiki/Q508378","display_name":"Intelligent transportation system","level":2,"score":0.28200000524520874},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.2750999927520752},{"id":"https://openalex.org/C19118579","wikidata":"https://www.wikidata.org/wiki/Q786423","display_name":"Frequency domain","level":2,"score":0.273499995470047},{"id":"https://openalex.org/C2779888511","wikidata":"https://www.wikidata.org/wiki/Q244156","display_name":"Traffic congestion","level":2,"score":0.2727000117301941},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.2624000012874603},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.2581000030040741},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.25209999084472656}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.24963/ijcai.2025/300","is_oa":false,"landing_page_url":"https://doi.org/10.24963/ijcai.2025/300","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Fourth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"With":[0],"the":[1,5,41,63,67,74,90,119,146,157,166,170,188,217],"rapid":[2],"urbanization":[3],"in":[4,23,83],"modern":[6],"era,":[7],"smart":[8],"traffic":[9,28,38,85,96,113,120,152,185],"profiling":[10,39],"based":[11],"on":[12,40,52,62,208],"multimodal":[13,71,106,163,191],"sources":[14],"of":[15,73,95,150],"data":[16,164,192],"has":[17],"been":[18,80],"playing":[19],"a":[20,104,175],"significant":[21],"role":[22],"ensuring":[24],"safe":[25],"travel,":[26],"reducing":[27],"congestion":[29],"and":[30,70,76,108,134,173,199],"optimizing":[31],"urban":[32],"mobility.":[33],"Most":[34],"existing":[35],"methods":[36],"for":[37,111,140,184],"road":[42,84,112,151],"level":[43],"usually":[44],"utilize":[45],"single-modality":[46],"data,":[47],"i.e.,":[48],"they":[49],"mainly":[50],"focus":[51],"image":[53],"processing":[54],"with":[55,202,216],"deep":[56],"vision":[57],"models":[58],"or":[59,93],"auxiliary":[60],"analysis":[61],"textual":[64,75],"data.":[65],"However,":[66],"joint":[68],"modeling":[69],"fusion":[72,109,197],"visual":[77],"modalities":[78],"have":[79],"rarely":[81],"studied":[82],"profiling,":[86,114],"which":[87],"largely":[88],"hinders":[89],"accurate":[91],"prediction":[92],"classification":[94],"conditions.":[97],"To":[98],"address":[99],"this":[100,142],"issue,":[101],"we":[102,155],"propose":[103],"novel":[105],"learning":[107],"framework":[110,124],"named":[115],"TraffiCFUS.":[116],"Specifically,":[117],"given":[118],"images,":[121],"our":[122],"TraffiCFUS":[123],"first":[125],"introduces":[126],"Vision":[127],"Language":[128],"Models":[129],"(VLMs)":[130],"to":[131,145,161,169,179,195],"generate":[132,196],"text":[133,143],"then":[135],"creates":[136],"tailored":[137],"prompt":[138],"instructions":[139],"refining":[141],"according":[144],"specific":[147],"scene":[148],"requirements":[149],"profiling.":[153,186],"Next,":[154],"apply":[156],"discrete":[158],"Fourier":[159],"transform":[160,178],"convert":[162],"from":[165],"spatial":[167,190],"domain":[168,172],"frequency":[171],"perform":[174],"cross-modal":[176],"spectrum":[177],"filter":[180],"out":[181],"irrelevant":[182],"information":[183],"Furthermore,":[187],"processed":[189],"is":[193],"combined":[194],"loss":[198,201],"interaction":[200],"contrastive":[203],"learning.":[204],"Finally,":[205],"extensive":[206],"experiments":[207],"four":[209],"real-world":[210],"datasets":[211],"illustrate":[212],"superior":[213],"performance":[214],"compared":[215],"state-of-the-art":[218],"approaches.":[219]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-10T00:00:00"}
