{"id":"https://openalex.org/W7131637079","doi":"https://doi.org/10.48550/arxiv.2602.21550","title":"Extending Sequence Length is Not All You Need: Effective Integration of Multimodal Signals for Gene Expression Prediction","display_name":"Extending Sequence Length is Not All You Need: Effective Integration of Multimodal Signals for Gene Expression Prediction","publication_year":2026,"publication_date":"2026-02-25","ids":{"openalex":"https://openalex.org/W7131637079","doi":"https://doi.org/10.48550/arxiv.2602.21550"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.21550","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5126900609","display_name":"Zhao Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yang, Zhao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126898856","display_name":"Yi Duan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Duan, Yi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126901935","display_name":"Jiwei Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Jiwei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126949671","display_name":"Ying Ba","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ba, Ying","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126948317","display_name":"Chuan Cao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cao, Chuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5126899714","display_name":"Bing Su","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Su, Bing","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5126900609"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10222","display_name":"Genomics and Chromatin Dynamics","score":0.5361999869346619,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10222","display_name":"Genomics and Chromatin Dynamics","score":0.5361999869346619,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.3610999882221222,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10885","display_name":"Gene expression and cancer classification","score":0.03240000084042549,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spurious-relationship","display_name":"Spurious relationship","score":0.7876999974250793},{"id":"https://openalex.org/keywords/epigenomics","display_name":"Epigenomics","score":0.6757000088691711},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.5827999711036682},{"id":"https://openalex.org/keywords/concatenation","display_name":"Concatenation (mathematics)","score":0.5720000267028809},{"id":"https://openalex.org/keywords/chromatin","display_name":"Chromatin","score":0.5511000156402588},{"id":"https://openalex.org/keywords/expression","display_name":"Expression (computer science)","score":0.44609999656677246},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.41359999775886536},{"id":"https://openalex.org/keywords/gene","display_name":"Gene","score":0.3668000102043152}],"concepts":[{"id":"https://openalex.org/C97256817","wikidata":"https://www.wikidata.org/wiki/Q1462316","display_name":"Spurious relationship","level":2,"score":0.7876999974250793},{"id":"https://openalex.org/C121912465","wikidata":"https://www.wikidata.org/wiki/Q3589153","display_name":"Epigenomics","level":5,"score":0.6757000088691711},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.5827999711036682},{"id":"https://openalex.org/C87619178","wikidata":"https://www.wikidata.org/wiki/Q126002","display_name":"Concatenation (mathematics)","level":2,"score":0.5720000267028809},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5625},{"id":"https://openalex.org/C83640560","wikidata":"https://www.wikidata.org/wiki/Q180951","display_name":"Chromatin","level":3,"score":0.5511000156402588},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.5238999724388123},{"id":"https://openalex.org/C90559484","wikidata":"https://www.wikidata.org/wiki/Q778379","display_name":"Expression (computer science)","level":2,"score":0.44609999656677246},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.41359999775886536},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3873000144958496},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.3668000102043152},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3578000068664551},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.34880000352859497},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.32179999351501465},{"id":"https://openalex.org/C2781023610","wikidata":"https://www.wikidata.org/wiki/Q17006304","display_name":"Burstiness","level":3,"score":0.31470000743865967},{"id":"https://openalex.org/C189206191","wikidata":"https://www.wikidata.org/wiki/Q222046","display_name":"Genomics","level":4,"score":0.30140000581741333},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.28630000352859497},{"id":"https://openalex.org/C165864922","wikidata":"https://www.wikidata.org/wiki/Q411391","display_name":"Regulation of gene expression","level":3,"score":0.27570000290870667},{"id":"https://openalex.org/C150194340","wikidata":"https://www.wikidata.org/wiki/Q26972","display_name":"Gene expression","level":3,"score":0.275299996137619},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.27160000801086426},{"id":"https://openalex.org/C51679486","wikidata":"https://www.wikidata.org/wiki/Q380546","display_name":"DNA sequencing","level":3,"score":0.2615000009536743},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2572999894618988},{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.25450000166893005},{"id":"https://openalex.org/C35639132","wikidata":"https://www.wikidata.org/wiki/Q7452468","display_name":"Sequence labeling","level":3,"score":0.2535000145435333},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.251800000667572}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.21550","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.21550","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.21550","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.21550","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Gene":[0],"expression":[1,6,184],"prediction,":[2],"which":[3,27,88],"predicts":[4],"mRNA":[5],"levels":[7],"from":[8,32],"DNA":[9],"sequences,":[10],"presents":[11],"significant":[12],"challenges.":[13],"Previous":[14],"works":[15],"often":[16],"focus":[17,80],"on":[18,81],"extending":[19],"input":[20],"sequence":[21,46],"length":[22],"to":[23,83,125,150,160],"locate":[24],"distal":[25],"enhancers,":[26],"may":[28,116,122],"influence":[29],"target":[30,73],"genes":[31,74],"hundreds":[33],"of":[34,146,171],"kilobases":[35],"away.":[36],"Our":[37,164],"work":[38],"first":[39],"reveals":[40],"that":[41,67,94,115,142,168],"for":[42,182],"current":[43],"models,":[44],"long":[45,62],"modeling":[47,170],"can":[48],"decrease":[49],"performance.":[50],"Even":[51],"carefully":[52],"designed":[53],"algorithms":[54],"only":[55,179],"mitigate":[56,161],"the":[57],"performance":[58,177],"degradation":[59],"caused":[60],"by":[61],"sequences.":[63],"Instead,":[64],"we":[65,79,137],"find":[66,93],"proximal":[68],"multimodal":[69,172],"epigenomic":[70,148,173],"signals":[71,174],"near":[72],"prove":[75],"more":[76],"essential.":[77],"Hence":[78],"how":[82],"better":[84],"integrate":[85],"these":[86,130],"signals,":[87],"has":[89],"been":[90],"overlooked.":[91],"We":[92],"different":[95],"signal":[96],"types":[97],"serve":[98],"distinct":[99,152],"biological":[100],"roles,":[101],"with":[102,129],"some":[103],"directly":[104],"marking":[105],"active":[106],"regulatory":[107],"elements":[108],"while":[109],"others":[110],"reflect":[111],"background":[112,131,153],"chromatin":[113,154],"patterns":[114],"introduce":[117],"confounding":[118,162],"effects.":[119,163],"Simple":[120],"concatenation":[121],"lead":[123],"models":[124],"develop":[126],"spurious":[127],"associations":[128],"patterns.":[132],"To":[133],"address":[134],"this":[135],"challenge,":[136],"propose":[138],"Prism,":[139],"a":[140],"framework":[141],"learns":[143],"multiple":[144],"combinations":[145],"high-dimensional":[147],"features":[149],"represent":[151],"states":[155],"and":[156],"uses":[157],"backdoor":[158],"adjustment":[159],"experimental":[165],"results":[166],"demonstrate":[167],"proper":[169],"achieves":[175],"state-of-the-art":[176],"using":[178],"short":[180],"sequences":[181],"gene":[183],"prediction.":[185]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-27T00:00:00"}
