{"id":"https://openalex.org/W3137599801","doi":"https://doi.org/10.1109/bigdata50022.2020.9378388","title":"Large Scale Financial Filing Analysis on HPCC Systems","display_name":"Large Scale Financial Filing Analysis on HPCC Systems","publication_year":2020,"publication_date":"2020-12-10","ids":{"openalex":"https://openalex.org/W3137599801","doi":"https://doi.org/10.1109/bigdata50022.2020.9378388","mag":"3137599801"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata50022.2020.9378388","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata50022.2020.9378388","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032656104","display_name":"Matthias Murray","orcid":"https://orcid.org/0009-0005-3285-1760"},"institutions":[{"id":"https://openalex.org/I4056153","display_name":"New College of Florida","ror":"https://ror.org/01cbya385","country_code":"US","type":"education","lineage":["https://openalex.org/I4056153"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Matthias Murray","raw_affiliation_strings":["LexisNexis Risk Solutions, New College of Florida, Sarasota, Florida, USA"],"affiliations":[{"raw_affiliation_string":"LexisNexis Risk Solutions, New College of Florida, Sarasota, Florida, USA","institution_ids":["https://openalex.org/I4056153"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080518580","display_name":"Arjuna Chala","orcid":null},"institutions":[{"id":"https://openalex.org/I4210094785","display_name":"Lexicon Pharmaceuticals (United States)","ror":"https://ror.org/00v64s089","country_code":"US","type":"company","lineage":["https://openalex.org/I4210094785"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Arjuna Chala","raw_affiliation_strings":["LexisNexis Risk Solutions, Atlanta, Georgia, USA"],"affiliations":[{"raw_affiliation_string":"LexisNexis Risk Solutions, Atlanta, Georgia, USA","institution_ids":["https://openalex.org/I4210094785"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026776669","display_name":"Lili Xu","orcid":"https://orcid.org/0000-0002-7766-9808"},"institutions":[{"id":"https://openalex.org/I4210094785","display_name":"Lexicon Pharmaceuticals (United States)","ror":"https://ror.org/00v64s089","country_code":"US","type":"company","lineage":["https://openalex.org/I4210094785"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lili Xu","raw_affiliation_strings":["LexisNexis Risk Solutions, Atlanta, Georgia, USA"],"affiliations":[{"raw_affiliation_string":"LexisNexis Risk Solutions, Atlanta, Georgia, USA","institution_ids":["https://openalex.org/I4210094785"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033177104","display_name":"Roger Dev","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Roger Dev","raw_affiliation_strings":["LexisNexis Risk Solutions, Denver, Colorado, USA"],"affiliations":[{"raw_affiliation_string":"LexisNexis Risk Solutions, Denver, Colorado, USA","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5032656104"],"corresponding_institution_ids":["https://openalex.org/I4056153"],"apc_list":null,"apc_paid":null,"fwci":0.5386,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.72692171,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"4429","last_page":"4436"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11326","display_name":"Stock Market Forecasting Methods","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11326","display_name":"Stock Market Forecasting Methods","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10047","display_name":"Financial Markets and Investment Strategies","score":0.9878000020980835,"subfield":{"id":"https://openalex.org/subfields/2003","display_name":"Finance"},"field":{"id":"https://openalex.org/fields/20","display_name":"Economics, Econometrics and Finance"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11653","display_name":"Financial Distress and Bankruptcy Prediction","score":0.9718000292778015,"subfield":{"id":"https://openalex.org/subfields/1402","display_name":"Accounting"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7356201410293579},{"id":"https://openalex.org/keywords/sentiment-analysis","display_name":"Sentiment analysis","score":0.610962450504303},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.5761349201202393},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.4625043570995331},{"id":"https://openalex.org/keywords/tracing","display_name":"Tracing","score":0.45539018511772156},{"id":"https://openalex.org/keywords/finance","display_name":"Finance","score":0.37357673048973083},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.36788395047187805},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.3099210262298584},{"id":"https://openalex.org/keywords/business","display_name":"Business","score":0.189388245344162}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7356201410293579},{"id":"https://openalex.org/C66402592","wikidata":"https://www.wikidata.org/wiki/Q2271421","display_name":"Sentiment analysis","level":2,"score":0.610962450504303},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.5761349201202393},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.4625043570995331},{"id":"https://openalex.org/C138673069","wikidata":"https://www.wikidata.org/wiki/Q322229","display_name":"Tracing","level":2,"score":0.45539018511772156},{"id":"https://openalex.org/C10138342","wikidata":"https://www.wikidata.org/wiki/Q43015","display_name":"Finance","level":1,"score":0.37357673048973083},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36788395047187805},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3099210262298584},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.189388245344162},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata50022.2020.9378388","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata50022.2020.9378388","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5199999809265137,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W1259090559","https://openalex.org/W2099238550","https://openalex.org/W2250886571","https://openalex.org/W2464243336","https://openalex.org/W2542080616","https://openalex.org/W2592713985","https://openalex.org/W2970636124","https://openalex.org/W3012033942","https://openalex.org/W3102772997","https://openalex.org/W3125715108","https://openalex.org/W4252402383","https://openalex.org/W4300485781","https://openalex.org/W6767182473"],"related_works":["https://openalex.org/W2548633793","https://openalex.org/W3013279174","https://openalex.org/W2941935829","https://openalex.org/W2596247554","https://openalex.org/W3132372214","https://openalex.org/W4224284088","https://openalex.org/W4286571989","https://openalex.org/W2765903680","https://openalex.org/W4317653575","https://openalex.org/W2381399653"],"abstract_inverted_index":{"Insights":[0],"from":[1,22],"public":[2],"companies'":[3],"financial":[4,53,60,105,127],"filings":[5,24,106,230],"are":[6],"necessary":[7],"for":[8,144],"securities":[9],"analysts":[10],"and":[11,69,110,117,157,176,203,233],"investors":[12],"to":[13,75,96,121,133,162,200,267],"make":[14],"the":[15,33,63,66,80,98,159,185,190,212,239,242],"right":[16],"investment":[17],"decisions.":[18],"Synthesizing":[19],"salient":[20],"facts":[21],"such":[23],"is":[25,36,85,131,198,207,216,223,260],"a":[26,52,108,122,183,196,234],"complex":[27],"language":[28],"task,":[29],"especially":[30],"now":[31],"as":[32,195,209,211],"data":[34,68,161],"volume":[35],"growing":[37],"at":[38],"an":[39],"overwhelming":[40],"pace.":[41],"To":[42,169],"ease":[43],"human":[44],"labor":[45],"in":[46,73,107,165,262],"this":[47],"process,":[48],"our":[49,129,221],"work":[50],"proposed":[51],"filing":[54,142],"analysis":[55,72,187,244],"pipeline":[56,84,130],"which":[57],"automatically":[58],"scrapes":[59],"filings,":[61,128],"generates":[62],"embeddings":[64,153],"of":[65,79,89,100,104,125,137,192,214,237,241],"contextual":[67],"performs":[70],"sentiment":[71,171,186,243],"order":[74],"predict":[76],"future":[77],"performance":[78],"underlying":[81],"companies.":[82],"The":[83],"built":[86],"on":[87,226],"top":[88],"Big":[90],"Data":[91],"processing":[92,101],"platform":[93],"HPCC":[94],"Systems":[95],"enable":[97],"capability":[99],"large":[102,123],"amounts":[103],"scalable":[109],"timely":[111],"manner.":[112],"By":[113],"applying":[114],"word":[115],"embedding":[116,164],"machine":[118],"learning":[119],"models":[120],"amount":[124],"SEC":[126,180,269],"able":[132],"process":[134],"20":[135],"GB":[136],"XBRL":[138],"files":[139],"--":[140,149],"5,000":[141,179,227],"documents":[143],"more":[145],"than":[146],"3,500":[147],"companies":[148],"into":[150],"50,000":[151],"sentence":[152],"within":[154],"5":[155],"minutes":[156],"transform":[158],"same":[160],"TF-IDF":[163],"about":[166],"8":[167],"minutes.":[168],"test":[170],"analysis,":[172],"we":[173],"randomly":[174],"sampled":[175],"manually":[177,228],"labeled":[178,229],"filings.":[181,270],"As":[182],"result,":[184],"suggested":[188],"that":[189,258],"usefulness":[191],"stock":[193],"price":[194],"metric":[197],"specific":[199],"each":[201],"industry":[202],"overall":[204],"market,":[205],"but":[206],"usable":[208],"long":[210],"scope":[213],"inquiry":[215],"sufficiently":[217],"narrow.":[218],"Additionally,":[219],"while":[220],"model":[222],"trained":[224],"only":[225],"with":[231],"unigrams":[232],"final":[235],"loss":[236],"0.09,":[238],"results":[240],"exhibited":[245],"discriminatory":[246],"power":[247],"exceeding":[248],"na\u00efve":[249],"label":[250],"selection":[251],"through":[252],"random":[253],"or":[254],"biased":[255],"choice,":[256],"suggesting":[257],"there":[259],"efficacy":[261],"using":[263],"Natural":[264],"Language":[265],"Processing":[266],"analyze":[268]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
