{"id":"https://openalex.org/W4391307071","doi":"https://doi.org/10.1109/vcip59821.2023.10402669","title":"Enhanced Pedestrian Trajectory Prediction via the Cross-Modal Feature Fusion Transformer","display_name":"Enhanced Pedestrian Trajectory Prediction via the Cross-Modal Feature Fusion Transformer","publication_year":2023,"publication_date":"2023-12-04","ids":{"openalex":"https://openalex.org/W4391307071","doi":"https://doi.org/10.1109/vcip59821.2023.10402669"},"language":"en","primary_location":{"id":"doi:10.1109/vcip59821.2023.10402669","is_oa":false,"landing_page_url":"https://doi.org/10.1109/vcip59821.2023.10402669","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE International Conference on Visual Communications and Image Processing (VCIP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101014167","display_name":"Rashid Ali","orcid":"https://orcid.org/0000-0002-6240-7805"},"institutions":[{"id":"https://openalex.org/I148366613","display_name":"National Yang Ming Chiao Tung University","ror":"https://ror.org/00se2k293","country_code":"TW","type":"education","lineage":["https://openalex.org/I148366613"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Rashid Ali","raw_affiliation_strings":["National Yang Ming Chiao Tung University,Department of Computer Science,Hsinchu,Taiwan","Department of Computer Science, National Yang Ming Chiao Tung University, Hsinchu, Taiwan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Yang Ming Chiao Tung University,Department of Computer Science,Hsinchu,Taiwan","institution_ids":["https://openalex.org/I148366613"]},{"raw_affiliation_string":"Department of Computer Science, National Yang Ming Chiao Tung University, Hsinchu, Taiwan","institution_ids":["https://openalex.org/I148366613"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5019927566","display_name":"Hsu-Feng Hsiao","orcid":"https://orcid.org/0000-0003-3414-5622"},"institutions":[{"id":"https://openalex.org/I148366613","display_name":"National Yang Ming Chiao Tung University","ror":"https://ror.org/00se2k293","country_code":"TW","type":"education","lineage":["https://openalex.org/I148366613"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Hsu-Feng Hsiao","raw_affiliation_strings":["National Yang Ming Chiao Tung University,Department of Computer Science,Hsinchu,Taiwan","Department of Computer Science, National Yang Ming Chiao Tung University, Hsinchu, Taiwan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Yang Ming Chiao Tung University,Department of Computer Science,Hsinchu,Taiwan","institution_ids":["https://openalex.org/I148366613"]},{"raw_affiliation_string":"Department of Computer Science, National Yang Ming Chiao Tung University, Hsinchu, Taiwan","institution_ids":["https://openalex.org/I148366613"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.2067,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.52264151,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.9927999973297119,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.9927999973297119,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10370","display_name":"Traffic and Road Safety","score":0.989799976348877,"subfield":{"id":"https://openalex.org/subfields/2213","display_name":"Safety, Risk, Reliability and Quality"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9897000193595886,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pedestrian","display_name":"Pedestrian","score":0.7019427418708801},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.6377200484275818},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.5959769487380981},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5947080254554749},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.5530654191970825},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.49443769454956055},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48111480474472046},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4411832094192505},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.34879589080810547},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.22720634937286377},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.1340613067150116},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.13071224093437195},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.10415038466453552},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.07462611794471741}],"concepts":[{"id":"https://openalex.org/C2777113093","wikidata":"https://www.wikidata.org/wiki/Q221488","display_name":"Pedestrian","level":2,"score":0.7019427418708801},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.6377200484275818},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.5959769487380981},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5947080254554749},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.5530654191970825},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.49443769454956055},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48111480474472046},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4411832094192505},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.34879589080810547},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.22720634937286377},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.1340613067150116},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.13071224093437195},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.10415038466453552},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.07462611794471741},{"id":"https://openalex.org/C1276947","wikidata":"https://www.wikidata.org/wiki/Q333","display_name":"Astronomy","level":1,"score":0.0},{"id":"https://openalex.org/C22212356","wikidata":"https://www.wikidata.org/wiki/Q775325","display_name":"Transport engineering","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/vcip59821.2023.10402669","is_oa":false,"landing_page_url":"https://doi.org/10.1109/vcip59821.2023.10402669","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE International Conference on Visual Communications and Image Processing (VCIP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/11","display_name":"Sustainable cities and communities","score":0.6800000071525574}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W1970206276","https://openalex.org/W2098699644","https://openalex.org/W2142996775","https://openalex.org/W2424778531","https://openalex.org/W2607296803","https://openalex.org/W2787091153","https://openalex.org/W2916798096","https://openalex.org/W2951870359","https://openalex.org/W2963001155","https://openalex.org/W2963150697","https://openalex.org/W2985871763","https://openalex.org/W3035339264","https://openalex.org/W3044021461","https://openalex.org/W3108908812","https://openalex.org/W3116651890","https://openalex.org/W3160050461","https://openalex.org/W3204219722","https://openalex.org/W4214593147","https://openalex.org/W4306801729"],"related_works":["https://openalex.org/W2392100589","https://openalex.org/W2512789322","https://openalex.org/W2101960027","https://openalex.org/W2197846993","https://openalex.org/W1976827262","https://openalex.org/W49697837","https://openalex.org/W3122828758","https://openalex.org/W2170799233","https://openalex.org/W2768112316","https://openalex.org/W4205958986"],"abstract_inverted_index":{"We":[0,164],"address":[1],"the":[2,16,39,52,78,108,126,129,139,155],"challenge":[3],"of":[4,21,32,38,42,54,111,116,142,158],"predicting":[5],"pedestrian":[6,87],"trajectories":[7,33],"in":[8,45,68],"videos,":[9],"a":[10,35,83,133],"task":[11],"inherently":[12],"complex":[13],"due":[14],"to":[15,105,153,179],"diverse":[17],"and":[18,24,62,101,121,160,170,173],"intricate":[19],"nature":[20],"human":[22,59,97],"motion":[23],"interactions":[25,127],"within":[26],"their":[27,66],"environment.":[28],"The":[29],"accurate":[30],"anticipation":[31],"necessitates":[34],"holistic":[36],"comprehension":[37],"temporal":[40,109,156],"evolution":[41,157],"past":[43,159],"events":[44],"videos.":[46],"Regrettably,":[47],"existing":[48],"methods":[49],"often":[50],"neglect":[51],"fusion":[53,119],"critical":[55],"features,":[56,95],"such":[57],"as":[58],"behavior,":[60,98],"motion,":[61],"interaction,":[63],"thereby":[64],"limiting":[65],"efficacy":[67],"tackling":[69],"these":[70,74],"challenges.":[71],"To":[72],"overcome":[73],"limitations,":[75],"we":[76,146],"propose":[77],"Cross-modal":[79],"Feature":[80],"Fusion":[81],"Transformer,":[82],"novel":[84],"approach":[85,167],"for":[86],"trajectory":[88],"prediction.":[89],"Our":[90],"model":[91],"seamlessly":[92],"integrates":[93],"multimodal":[94,130,162],"including":[96],"position,":[99],"speed,":[100],"interaction":[102],"with":[103],"surroundings,":[104],"effectively":[106],"encapsulate":[107],"progression":[110],"observed":[112],"frames.":[113],"It":[114],"consists":[115],"transformer-based":[117],"cross-modal":[118],"encoder":[120],"decoder":[122],"modules,":[123],"adeptly":[124],"melding":[125],"between":[128],"features":[131],"through":[132],"multi-head":[134],"co-attentional":[135],"mechanism.":[136],"This":[137],"enables":[138],"precise":[140],"prediction":[141,151],"future":[143,150,161],"trajectories.":[144],"Additionally,":[145],"incorporate":[147],"auxiliary":[148],"self-supervised":[149],"losses":[152],"learn":[154],"features.":[163],"evaluate":[165],"our":[166],"on":[168],"ETH/UCY":[169],"ActEV/VIRAT":[171],"datasets":[172],"demonstrate":[174],"its":[175],"superior":[176],"performance":[177],"compared":[178],"state-of-the-art":[180],"methods.":[181]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
