{"id":"https://openalex.org/W7126211032","doi":"https://doi.org/10.1142/s012906572650019x","title":"Causal Mask in Transformer via Transfer Entropy Estimation from Vector Autoregressive Learning for Multivariate Time Series Forecasting","display_name":"Causal Mask in Transformer via Transfer Entropy Estimation from Vector Autoregressive Learning for Multivariate Time Series Forecasting","publication_year":2026,"publication_date":"2026-01-30","ids":{"openalex":"https://openalex.org/W7126211032","doi":"https://doi.org/10.1142/s012906572650019x","pmid":"https://pubmed.ncbi.nlm.nih.gov/41852315"},"language":"en","primary_location":{"id":"doi:10.1142/s012906572650019x","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s012906572650019x","pdf_url":null,"source":{"id":"https://openalex.org/S197665576","display_name":"International Journal of Neural Systems","issn_l":"0129-0657","issn":["0129-0657","1793-6462"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Neural Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5110945747","display_name":"Chengli Zhou","orcid":"https://orcid.org/0000-0002-2852-7096"},"institutions":[{"id":"https://openalex.org/I189210763","display_name":"Yunnan University","ror":"https://ror.org/0040axw97","country_code":"CN","type":"education","lineage":["https://openalex.org/I189210763"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chengli Zhou","raw_affiliation_strings":["School of Information Science and Engineering, Yunnan University, South Waihuan Road, University City East, Kunming, Yunnan, P. R. China"],"raw_orcid":"https://orcid.org/0000-0002-2852-7096","affiliations":[{"raw_affiliation_string":"School of Information Science and Engineering, Yunnan University, South Waihuan Road, University City East, Kunming, Yunnan, P. R. China","institution_ids":["https://openalex.org/I189210763"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Zicheng Wang","orcid":"https://orcid.org/0009-0003-7313-9820"},"institutions":[{"id":"https://openalex.org/I4210120238","display_name":"PowerChina (China)","ror":"https://ror.org/01varr368","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210120238"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zicheng Wang","raw_affiliation_strings":["PowerChina Kunming Engineering Corporation Limited, Kunming, Yunnan, P. R. China"],"raw_orcid":"https://orcid.org/0009-0003-7313-9820","affiliations":[{"raw_affiliation_string":"PowerChina Kunming Engineering Corporation Limited, Kunming, Yunnan, P. R. China","institution_ids":["https://openalex.org/I4210120238"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yaqun Huang","orcid":"https://orcid.org/0000-0003-2112-9957"},"institutions":[{"id":"https://openalex.org/I189210763","display_name":"Yunnan University","ror":"https://ror.org/0040axw97","country_code":"CN","type":"education","lineage":["https://openalex.org/I189210763"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yaqun Huang","raw_affiliation_strings":["School of Information Science and Engineering, Yunnan University, South Waihuan Road, University City East, Kunming, Yunnan, P. R. China"],"raw_orcid":"https://orcid.org/0000-0003-2112-9957","affiliations":[{"raw_affiliation_string":"School of Information Science and Engineering, Yunnan University, South Waihuan Road, University City East, Kunming, Yunnan, P. R. China","institution_ids":["https://openalex.org/I189210763"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Dapeng Tao","orcid":"https://orcid.org/0000-0003-0783-5273"},"institutions":[{"id":"https://openalex.org/I189210763","display_name":"Yunnan University","ror":"https://ror.org/0040axw97","country_code":"CN","type":"education","lineage":["https://openalex.org/I189210763"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dapeng Tao","raw_affiliation_strings":["School of Information Science and Engineering, Yunnan University, South Waihuan Road, University City East, Kunming, Yunnan, P. R. China"],"raw_orcid":"https://orcid.org/0000-0003-0783-5273","affiliations":[{"raw_affiliation_string":"School of Information Science and Engineering, Yunnan University, South Waihuan Road, University City East, Kunming, Yunnan, P. R. China","institution_ids":["https://openalex.org/I189210763"]}]},{"author_position":"last","author":{"id":null,"display_name":"Chunna Zhao","orcid":"https://orcid.org/0000-0002-0019-1041"},"institutions":[{"id":"https://openalex.org/I189210763","display_name":"Yunnan University","ror":"https://ror.org/0040axw97","country_code":"CN","type":"education","lineage":["https://openalex.org/I189210763"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chunna Zhao","raw_affiliation_strings":["School of Information Science and Engineering, Yunnan University, South Waihuan Road, University City East, Kunming, Yunnan, P. R. China"],"raw_orcid":"https://orcid.org/0000-0002-0019-1041","affiliations":[{"raw_affiliation_string":"School of Information Science and Engineering, Yunnan University, South Waihuan Road, University City East, Kunming, Yunnan, P. R. China","institution_ids":["https://openalex.org/I189210763"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.12877628,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"36","issue":"08","first_page":"2650019","last_page":"2650019"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.25760000944137573,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.25760000944137573,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11344","display_name":"Traffic Prediction and Management Techniques","score":0.13490000367164612,"subfield":{"id":"https://openalex.org/subfields/2215","display_name":"Building and Construction"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.09690000116825104,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transfer-entropy","display_name":"Transfer entropy","score":0.7038999795913696},{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.6129000186920166},{"id":"https://openalex.org/keywords/spurious-relationship","display_name":"Spurious relationship","score":0.5651000142097473},{"id":"https://openalex.org/keywords/causal-inference","display_name":"Causal inference","score":0.5291000008583069},{"id":"https://openalex.org/keywords/time-series","display_name":"Time series","score":0.46389999985694885},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.4564000070095062},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.44749999046325684},{"id":"https://openalex.org/keywords/multivariate-statistics","display_name":"Multivariate statistics","score":0.4226999878883362},{"id":"https://openalex.org/keywords/autocorrelation","display_name":"Autocorrelation","score":0.34470000863075256}],"concepts":[{"id":"https://openalex.org/C182049051","wikidata":"https://www.wikidata.org/wiki/Q17147155","display_name":"Transfer entropy","level":3,"score":0.7038999795913696},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6402999758720398},{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.6129000186920166},{"id":"https://openalex.org/C97256817","wikidata":"https://www.wikidata.org/wiki/Q1462316","display_name":"Spurious relationship","level":2,"score":0.5651000142097473},{"id":"https://openalex.org/C158600405","wikidata":"https://www.wikidata.org/wiki/Q5054566","display_name":"Causal inference","level":2,"score":0.5291000008583069},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.46459999680519104},{"id":"https://openalex.org/C151406439","wikidata":"https://www.wikidata.org/wiki/Q186588","display_name":"Time series","level":2,"score":0.46389999985694885},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.461899995803833},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.4564000070095062},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.44749999046325684},{"id":"https://openalex.org/C161584116","wikidata":"https://www.wikidata.org/wiki/Q1952580","display_name":"Multivariate statistics","level":2,"score":0.4226999878883362},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.34769999980926514},{"id":"https://openalex.org/C5297727","wikidata":"https://www.wikidata.org/wiki/Q786970","display_name":"Autocorrelation","level":2,"score":0.34470000863075256},{"id":"https://openalex.org/C129824826","wikidata":"https://www.wikidata.org/wiki/Q2630107","display_name":"Granger causality","level":2,"score":0.34369999170303345},{"id":"https://openalex.org/C11671645","wikidata":"https://www.wikidata.org/wiki/Q5054567","display_name":"Causal model","level":2,"score":0.3411000072956085},{"id":"https://openalex.org/C143724316","wikidata":"https://www.wikidata.org/wiki/Q312468","display_name":"Series (stratigraphy)","level":2,"score":0.3411000072956085},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3160000145435333},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.3109999895095825},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.3084999918937683},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.305400013923645},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3046000003814697},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.2987000048160553},{"id":"https://openalex.org/C133029050","wikidata":"https://www.wikidata.org/wiki/Q385593","display_name":"Vector autoregression","level":2,"score":0.2890999913215637},{"id":"https://openalex.org/C39927690","wikidata":"https://www.wikidata.org/wiki/Q11197","display_name":"Logarithm","level":2,"score":0.2831999957561493},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.28119999170303345},{"id":"https://openalex.org/C163504300","wikidata":"https://www.wikidata.org/wiki/Q2364925","display_name":"Causal structure","level":2,"score":0.2777000069618225},{"id":"https://openalex.org/C160234255","wikidata":"https://www.wikidata.org/wiki/Q812535","display_name":"Bayesian inference","level":3,"score":0.26829999685287476},{"id":"https://openalex.org/C9936470","wikidata":"https://www.wikidata.org/wiki/Q6510405","display_name":"Least-squares function approximation","level":3,"score":0.25940001010894775}],"mesh":[{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D005544","descriptor_name":"Forecasting","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D005544","descriptor_name":"Forecasting","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D019277","descriptor_name":"Entropy","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D019277","descriptor_name":"Entropy","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true}],"locations_count":2,"locations":[{"id":"doi:10.1142/s012906572650019x","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s012906572650019x","pdf_url":null,"source":{"id":"https://openalex.org/S197665576","display_name":"International Journal of Neural Systems","issn_l":"0129-0657","issn":["0129-0657","1793-6462"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Neural Systems","raw_type":"journal-article"},{"id":"pmid:41852315","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/41852315","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International journal of neural systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.8760024905204773,"display_name":"Climate action","id":"https://metadata.un.org/sdg/13"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W2004731943","https://openalex.org/W2028723065","https://openalex.org/W2029510783","https://openalex.org/W2143117649","https://openalex.org/W2736832651","https://openalex.org/W2921440296","https://openalex.org/W2966126335","https://openalex.org/W3027660952","https://openalex.org/W3037979079","https://openalex.org/W3116046270","https://openalex.org/W4286579687","https://openalex.org/W4311770445","https://openalex.org/W4362472185","https://openalex.org/W4382281637","https://openalex.org/W4387542285","https://openalex.org/W4388011400","https://openalex.org/W4389306213","https://openalex.org/W4389937725","https://openalex.org/W4390754477","https://openalex.org/W4392267336","https://openalex.org/W4392946498","https://openalex.org/W4393251322","https://openalex.org/W4393319450","https://openalex.org/W4393864814","https://openalex.org/W4396599589","https://openalex.org/W4402351715","https://openalex.org/W4402930846","https://openalex.org/W4403600985","https://openalex.org/W4404452634","https://openalex.org/W4404562820","https://openalex.org/W4405031167","https://openalex.org/W4407574945","https://openalex.org/W4407864597","https://openalex.org/W4408010720","https://openalex.org/W4408537280","https://openalex.org/W4408949334","https://openalex.org/W4409796428","https://openalex.org/W4411633188","https://openalex.org/W4411732612","https://openalex.org/W4413007325"],"related_works":[],"abstract_inverted_index":{"Time":[0],"series":[1],"forecasting":[2,26],"remains":[3],"challenging":[4],"in":[5,97],"domains":[6],"such":[7],"as":[8],"finance":[9],"and":[10,49,66,77],"climate":[11],"science,":[12],"where":[13],"complex":[14],"interactions":[15],"among":[16],"variables":[17],"often":[18],"induce":[19],"spurious":[20],"correlations.":[21],"We":[22],"propose":[23],"ARCausal,":[24],"a":[25,41],"framework":[27],"that":[28],"integrates":[29],"transfer":[30],"entropy":[31],"(TE)-based":[32],"causal":[33,43,59,71,112],"discovery":[34],"with":[35],"Transformer":[36],"attention":[37],"modeling.":[38],"ARCausal":[39],"introduces":[40],"sparse":[42],"masking":[44],"mechanism":[45],"derived":[46],"from":[47,69],"TE":[48],"refined":[50],"via":[51],"vector":[52],"autoregression":[53],"(VAR)":[54],"estimation":[55],"to":[56,92],"capture":[57],"dynamic":[58],"interactions.":[60],"The":[61,114],"mask":[62],"suppresses":[63],"noninformative":[64],"dependencies":[65],"distinguishes":[67],"autocorrelation":[68],"cross-variable":[70],"effects,":[72],"improving":[73],"both":[74],"predictive":[75],"performance":[76],"interpretability.":[78],"Experiments":[79],"on":[80],"nine":[81],"benchmark":[82],"datasets":[83],"demonstrate":[84],"consistent":[85],"improvements":[86],"over":[87],"strong":[88],"baselines,":[89],"achieving":[90],"up":[91],"[Formula:":[93],"see":[94],"text]":[95],"reduction":[96],"MSE":[98],"while":[99],"maintaining":[100],"computational":[101],"efficiency.":[102],"Visualization":[103],"results":[104],"further":[105],"illustrate":[106],"the":[107,110],"interpretability":[108],"of":[109],"learned":[111],"structures.":[113],"code":[115],"is":[116],"publicly":[117],"available":[118],"at":[119],"https://github.com/jancely/ARCausal/.":[120]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-02-01T00:00:00"}
