{"id":"https://openalex.org/W4401863947","doi":"https://doi.org/10.1145/3637528.3671629","title":"FNSPID: A Comprehensive Financial News Dataset in Time Series","display_name":"FNSPID: A Comprehensive Financial News Dataset in Time Series","publication_year":2024,"publication_date":"2024-08-24","ids":{"openalex":"https://openalex.org/W4401863947","doi":"https://doi.org/10.1145/3637528.3671629"},"language":"en","primary_location":{"id":"doi:10.1145/3637528.3671629","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3637528.3671629","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101282550","display_name":"Zihan Dong","orcid":"https://orcid.org/0000-0003-4079-7520"},"institutions":[{"id":"https://openalex.org/I137902535","display_name":"North Carolina State University","ror":"https://ror.org/04tj63d06","country_code":"US","type":"education","lineage":["https://openalex.org/I137902535"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Zihan Dong","raw_affiliation_strings":["North Carolina State University, Raleigh, NC, USA"],"affiliations":[{"raw_affiliation_string":"North Carolina State University, Raleigh, NC, USA","institution_ids":["https://openalex.org/I137902535"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102216621","display_name":"Fan Xinyu","orcid":"https://orcid.org/0009-0000-8416-7948"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xinyu Fan","raw_affiliation_strings":["SiChuan University, Chengdu, Sichuan Province, China"],"affiliations":[{"raw_affiliation_string":"SiChuan University, Chengdu, Sichuan Province, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5106713617","display_name":"Zhiyuan Peng","orcid":"https://orcid.org/0000-0002-5133-9237"},"institutions":[{"id":"https://openalex.org/I137902535","display_name":"North Carolina State University","ror":"https://ror.org/04tj63d06","country_code":"US","type":"education","lineage":["https://openalex.org/I137902535"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhiyuan Peng","raw_affiliation_strings":["North Carolina State University, Raleigh, NC, USA"],"affiliations":[{"raw_affiliation_string":"North Carolina State University, Raleigh, NC, USA","institution_ids":["https://openalex.org/I137902535"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101282550"],"corresponding_institution_ids":["https://openalex.org/I137902535"],"apc_list":null,"apc_paid":null,"fwci":17.4934,"has_fulltext":false,"cited_by_count":36,"citation_normalized_percentile":{"value":0.9941421,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"4918","last_page":"4927"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11326","display_name":"Stock Market Forecasting Methods","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11326","display_name":"Stock Market Forecasting Methods","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11270","display_name":"Complex Systems and Time Series Analysis","score":0.9929999709129333,"subfield":{"id":"https://openalex.org/subfields/2002","display_name":"Economics and Econometrics"},"field":{"id":"https://openalex.org/fields/20","display_name":"Economics, Econometrics and Finance"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12205","display_name":"Time Series Analysis and Forecasting","score":0.9922000169754028,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/series","display_name":"Series (stratigraphy)","score":0.7225050330162048},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6152687072753906},{"id":"https://openalex.org/keywords/time-series","display_name":"Time series","score":0.549872875213623},{"id":"https://openalex.org/keywords/finance","display_name":"Finance","score":0.5342053174972534},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.16210809350013733},{"id":"https://openalex.org/keywords/business","display_name":"Business","score":0.15539312362670898}],"concepts":[{"id":"https://openalex.org/C143724316","wikidata":"https://www.wikidata.org/wiki/Q312468","display_name":"Series (stratigraphy)","level":2,"score":0.7225050330162048},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6152687072753906},{"id":"https://openalex.org/C151406439","wikidata":"https://www.wikidata.org/wiki/Q186588","display_name":"Time series","level":2,"score":0.549872875213623},{"id":"https://openalex.org/C10138342","wikidata":"https://www.wikidata.org/wiki/Q43015","display_name":"Finance","level":1,"score":0.5342053174972534},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.16210809350013733},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.15539312362670898},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3637528.3671629","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3637528.3671629","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W1491302875","https://openalex.org/W1973784058","https://openalex.org/W2029056003","https://openalex.org/W2112262445","https://openalex.org/W2166215547","https://openalex.org/W2287044641","https://openalex.org/W2290024306","https://openalex.org/W2735895797","https://openalex.org/W2786391746","https://openalex.org/W2801889078","https://openalex.org/W2965771985","https://openalex.org/W2977178908","https://openalex.org/W3009542284","https://openalex.org/W3080310350","https://openalex.org/W3139013678","https://openalex.org/W3170147864","https://openalex.org/W3203680104","https://openalex.org/W4221125416","https://openalex.org/W4230661391","https://openalex.org/W4283574889","https://openalex.org/W4291804489","https://openalex.org/W4296099891","https://openalex.org/W4313830571","https://openalex.org/W4315777292","https://openalex.org/W4317930845","https://openalex.org/W4364320763","https://openalex.org/W4379259169","https://openalex.org/W4382998379","https://openalex.org/W4387618806","https://openalex.org/W4388335799","https://openalex.org/W4390858905"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2622688551","https://openalex.org/W1550175370","https://openalex.org/W1990205660"],"abstract_inverted_index":{"Financial":[0,92],"market":[1,12,55,128,138,165],"predictions":[2,16],"utilize":[3],"historical":[4],"data":[5],"to":[6,67,122,209],"anticipate":[7],"future":[8],"stock":[9,28,104,127,137],"prices":[10,105],"and":[11,34,57,77,94,106,142,161,192,213],"trends.":[13],"Traditionally,":[14],"these":[15],"have":[17],"focused":[18],"on":[19,153,175],"the":[20,46,68,118,158,176,186,205],"statistical":[21],"analysis":[22,49,151],"of":[23,50,70],"quantitative":[24,76],"factors,":[25],"such":[26],"as":[27],"prices,":[29],"trading":[30],"volumes,":[31],"inflation":[32],"rates,":[33],"changes":[35],"in":[36,41,140],"industrial":[37],"production.":[38],"Recent":[39],"advancements":[40],"large":[42],"language":[43],"models":[44],"motivate":[45],"integrated":[47],"financial":[48,89,110,150,206],"both":[51,75],"sentiment":[52,79,147,170],"data,":[53],"particularly":[54],"news,":[56],"numerical":[58],"factors.":[59],"Nonetheless,":[60],"this":[61,83,197],"methodology":[62],"frequently":[63],"encounters":[64],"constraints":[65],"due":[66],"paucity":[69],"extensive":[71],"datasets":[72,139],"that":[73,133,183],"amalgamate":[74],"qualitative":[78],"analyses.":[80],"To":[81],"address":[82],"challenge,":[84],"we":[85,155],"introduce":[86],"a":[87,180],"large-scale":[88],"dataset,":[90],"namely,":[91],"News":[93],"Stock":[95],"Price":[96],"Integration":[97],"Dataset":[98],"(FNSPID).":[99],"It":[100],"comprises":[101],"29.7":[102],"million":[103,108],"15.7":[107],"time-aligned":[109],"news":[111,129],"records":[112],"for":[113,204],"4,775":[114],"S&P500":[115],"companies,":[116],"covering":[117],"period":[119],"from":[120,125],"1999":[121],"2023,":[123],"sourced":[124],"4":[126],"websites.":[130],"We":[131],"demonstrate":[132],"FNSPID":[134,200],"excels":[135],"existing":[136],"scale":[141],"diversity":[143],"while":[144],"uniquely":[145],"incorporating":[146],"information.":[148],"Through":[149],"experiments":[152],"FNSPID,":[154],"propose:":[156],"(1)":[157],"dataset's":[159],"size":[160],"quality":[162],"significantly":[163],"boost":[164],"prediction":[166],"accuracy;":[167],"(2)":[168],"adding":[169],"scores":[171],"modestly":[172],"enhances":[173],"performance":[174],"transformer-based":[177],"model;":[178],"(3)":[179],"reproducible":[181],"procedure":[182],"can":[184],"update":[185],"dataset.":[187],"Completed":[188],"work,":[189],"code,":[190],"documentation,":[191],"examples":[193],"are":[194],"available":[195],"at":[196],"http":[198],"URL.":[199],"offers":[201],"unprecedented":[202],"opportunities":[203],"research":[207],"community":[208],"advance":[210],"predictive":[211],"modeling":[212],"analysis.":[214]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":28},{"year":2024,"cited_by_count":4}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
