{"id":"https://openalex.org/W4206080608","doi":"https://doi.org/10.1109/bigdata52589.2021.9671853","title":"Scraping Unstructured Data to Explore the Relationship between Rainfall Anomalies and Vector-Borne Disease Outbreaks","display_name":"Scraping Unstructured Data to Explore the Relationship between Rainfall Anomalies and Vector-Borne Disease Outbreaks","publication_year":2021,"publication_date":"2021-12-15","ids":{"openalex":"https://openalex.org/W4206080608","doi":"https://doi.org/10.1109/bigdata52589.2021.9671853"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata52589.2021.9671853","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata52589.2021.9671853","pdf_url":null,"source":{"id":"https://openalex.org/S4363607718","display_name":"2021 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103810002","display_name":"Ethan Joseph","orcid":null},"institutions":[{"id":"https://openalex.org/I165799507","display_name":"Rensselaer Polytechnic Institute","ror":"https://ror.org/01rtyzb94","country_code":"US","type":"education","lineage":["https://openalex.org/I165799507"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Ethan Joseph","raw_affiliation_strings":["Department of Computer Science, Rensselaer Polytechnic Institute, Troy, NY"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Rensselaer Polytechnic Institute, Troy, NY","institution_ids":["https://openalex.org/I165799507"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055253153","display_name":"Thilanka Munasinghe","orcid":"https://orcid.org/0000-0002-0911-750X"},"institutions":[{"id":"https://openalex.org/I165799507","display_name":"Rensselaer Polytechnic Institute","ror":"https://ror.org/01rtyzb94","country_code":"US","type":"education","lineage":["https://openalex.org/I165799507"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Thilanka Munasinghe","raw_affiliation_strings":["Department of Information Technology and Web Science, Rensselaer Polytechnic Institute, roy, NY"],"affiliations":[{"raw_affiliation_string":"Department of Information Technology and Web Science, Rensselaer Polytechnic Institute, roy, NY","institution_ids":["https://openalex.org/I165799507"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026138341","display_name":"Heidi Tubbs","orcid":null},"institutions":[{"id":"https://openalex.org/I1306266525","display_name":"Goddard Space Flight Center","ror":"https://ror.org/0171mag52","country_code":"US","type":"facility","lineage":["https://openalex.org/I1306266525","https://openalex.org/I4210124779"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Heidi Tubbs","raw_affiliation_strings":["University Space Research Association, Goddard Earth Science and Technology Research & Biospheric Sciences Laboratory, Code 618 NASA/Goddard Space Flight Center, Greenbelt, MD"],"affiliations":[{"raw_affiliation_string":"University Space Research Association, Goddard Earth Science and Technology Research & Biospheric Sciences Laboratory, Code 618 NASA/Goddard Space Flight Center, Greenbelt, MD","institution_ids":["https://openalex.org/I1306266525"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091813779","display_name":"Bhaskar Bishnoi","orcid":null},"institutions":[{"id":"https://openalex.org/I1306266525","display_name":"Goddard Space Flight Center","ror":"https://ror.org/0171mag52","country_code":"US","type":"facility","lineage":["https://openalex.org/I1306266525","https://openalex.org/I4210124779"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bhaskar Bishnoi","raw_affiliation_strings":["University Space Research Association, Goddard Earth Science and Technology Research & Biospheric Sciences Laboratory, Code 618 NASA/Goddard Space Flight Center, Greenbelt, MD"],"affiliations":[{"raw_affiliation_string":"University Space Research Association, Goddard Earth Science and Technology Research & Biospheric Sciences Laboratory, Code 618 NASA/Goddard Space Flight Center, Greenbelt, MD","institution_ids":["https://openalex.org/I1306266525"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5083158713","display_name":"Assaf Anyamba","orcid":"https://orcid.org/0000-0003-0932-9585"},"institutions":[{"id":"https://openalex.org/I1306266525","display_name":"Goddard Space Flight Center","ror":"https://ror.org/0171mag52","country_code":"US","type":"facility","lineage":["https://openalex.org/I1306266525","https://openalex.org/I4210124779"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Assaf Anyamba","raw_affiliation_strings":["University Space Research Association, Goddard Earth Science and Technology Research & Biospheric Sciences Laboratory, Code 618 NASA/Goddard Space Flight Center, Greenbelt, MD"],"affiliations":[{"raw_affiliation_string":"University Space Research Association, Goddard Earth Science and Technology Research & Biospheric Sciences Laboratory, Code 618 NASA/Goddard Space Flight Center, Greenbelt, MD","institution_ids":["https://openalex.org/I1306266525"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5103810002"],"corresponding_institution_ids":["https://openalex.org/I165799507"],"apc_list":null,"apc_paid":null,"fwci":1.8417,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.86614173,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"4156","last_page":"4164"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12047","display_name":"Viral Infections and Vectors","score":0.9923999905586243,"subfield":{"id":"https://openalex.org/subfields/2725","display_name":"Infectious Diseases"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T12047","display_name":"Viral Infections and Vectors","score":0.9923999905586243,"subfield":{"id":"https://openalex.org/subfields/2725","display_name":"Infectious Diseases"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11819","display_name":"Data-Driven Disease Surveillance","score":0.9811000227928162,"subfield":{"id":"https://openalex.org/subfields/2713","display_name":"Epidemiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11581","display_name":"Viral Infections and Outbreaks Research","score":0.9704999923706055,"subfield":{"id":"https://openalex.org/subfields/2725","display_name":"Infectious Diseases"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/outbreak","display_name":"Outbreak","score":0.6647547483444214},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5664480924606323},{"id":"https://openalex.org/keywords/vector","display_name":"Vector (molecular biology)","score":0.4301599860191345},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3894844055175781},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.34978345036506653},{"id":"https://openalex.org/keywords/virology","display_name":"Virology","score":0.13934090733528137},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.13456329703330994}],"concepts":[{"id":"https://openalex.org/C116675565","wikidata":"https://www.wikidata.org/wiki/Q3241045","display_name":"Outbreak","level":2,"score":0.6647547483444214},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5664480924606323},{"id":"https://openalex.org/C92087593","wikidata":"https://www.wikidata.org/wiki/Q427389","display_name":"Vector (molecular biology)","level":4,"score":0.4301599860191345},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3894844055175781},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.34978345036506653},{"id":"https://openalex.org/C159047783","wikidata":"https://www.wikidata.org/wiki/Q7215","display_name":"Virology","level":1,"score":0.13934090733528137},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.13456329703330994},{"id":"https://openalex.org/C40767141","wikidata":"https://www.wikidata.org/wiki/Q285697","display_name":"Recombinant DNA","level":3,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata52589.2021.9671853","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata52589.2021.9671853","pdf_url":null,"source":{"id":"https://openalex.org/S4363607718","display_name":"2021 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6200000047683716,"display_name":"Climate action","id":"https://metadata.un.org/sdg/13"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320337789","display_name":"Armed Forces Health Surveillance Branch","ror":null},{"id":"https://openalex.org/F4320338440","display_name":"HORIZON EUROPE Health","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W1574447377","https://openalex.org/W1673310716","https://openalex.org/W1959277991","https://openalex.org/W1980410354","https://openalex.org/W2017539781","https://openalex.org/W2066033351","https://openalex.org/W2074705379","https://openalex.org/W2081635696","https://openalex.org/W2095897464","https://openalex.org/W2102580718","https://openalex.org/W2103235794","https://openalex.org/W2105260527","https://openalex.org/W2117239687","https://openalex.org/W2120467164","https://openalex.org/W2125310307","https://openalex.org/W2130534289","https://openalex.org/W2552618985","https://openalex.org/W2606974598","https://openalex.org/W2621012555","https://openalex.org/W2731541882","https://openalex.org/W2769222961","https://openalex.org/W2949488362","https://openalex.org/W2995633738","https://openalex.org/W2998574456","https://openalex.org/W3005917722","https://openalex.org/W3034999214","https://openalex.org/W3098163157","https://openalex.org/W3115058362","https://openalex.org/W3137269758","https://openalex.org/W4230698883","https://openalex.org/W6637131181","https://openalex.org/W6769311223","https://openalex.org/W6773617766","https://openalex.org/W6903664821"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2373635223","https://openalex.org/W2412355096","https://openalex.org/W1990012352","https://openalex.org/W2431766951","https://openalex.org/W4385969441","https://openalex.org/W127458931","https://openalex.org/W2362266265","https://openalex.org/W3028429280"],"abstract_inverted_index":{"According":[0],"to":[1,24,99,137,182],"the":[2,34,53,76,101,118,149,184,191,198,211,219],"World":[3],"Health":[4],"Organization":[5],"(WHO),":[6],"vector-borne":[7,37,57,81,194,205],"diseases":[8,38],"such":[9],"as":[10],"malaria":[11],"and":[12,22,32,63,74,78,96,105,133,145,200,213,238],"dengue":[13],"account":[14],"for":[15,117,171,236],"17%":[16],"of":[17,36,47,49,56,80,141,186,193,197],"all":[18],"infectious":[19],"disease":[20,58,82,106,206,234],"cases":[21],"lead":[23],"more":[25],"than":[26],"700,000":[27],"deaths":[28],"per":[29],"year.":[30],"Tracking":[31,73],"predicting":[33,75,239],"spread":[35,79,192],"is":[39],"a":[40,113,126,139,227],"vital":[41],"task":[42],"that":[43,124,204],"could":[44],"save":[45],"hundreds":[46],"thousands":[48],"lives":[50],"annually.":[51],"Oftentimes,":[52],"first":[54],"reports":[55],"outbreaks":[59,83,147,207,214],"occur":[60],"through":[61],"emails":[62],"online":[64,119],"reporting":[65,121],"systems":[66],"long":[67],"before":[68],"they":[69],"are":[70,208,215],"officially":[71],"documented.":[72],"emergence":[77],"requires":[84],"extracting":[85],"data":[86,98,114,202],"from":[87,166],"these":[88],"unstructured":[89],"sources":[90],"in":[91,159,230],"combination":[92],"with":[93,161],"historical":[94],"weather":[95],"climate":[97],"understand":[100,183],"underlying":[102],"background":[103],"triggers":[104],"dynamics.":[107],"In":[108],"this":[109],"work,":[110],"we":[111],"develop":[112],"extraction":[115],"pipeline":[116],"outbreak":[120],"website":[122],"ProMED-mail":[123,199],"utilizes":[125],"web":[127],"scraper,":[128],"transformer":[129],"neural":[130],"network":[131],"summarizer,":[132],"named":[134],"entity":[135],"recognizer":[136],"obtain":[138],"dataset":[140,155,224],"malaria,":[142],"dengue,":[143],"zika,":[144],"chikungunya":[146],"over":[148],"last":[150],"30":[151],"years.":[152],"This":[153,178],"scraped":[154,223],"was":[156,181],"further":[157],"analyzed":[158],"association":[160],"global":[162,187],"rainfall":[163,188],"anomalies":[164],"derived":[165],"NASA\u2019s":[167],"Integrated":[168],"Multi-satellitE":[169],"Retrievals":[170],"GPM":[172,201],"[Global":[173],"Precipitation":[174],"Mission]":[175],"(IMERG)":[176],"dataset.":[177],"preliminary":[179],"analysis":[180],"effect":[185],"patterns":[189],"on":[190],"diseases.":[195],"Analysis":[196],"shows":[203],"clustered":[209],"towards":[210],"tropics":[212],"often":[216],"amplified":[217],"during":[218],"rainy":[220],"seasons.":[221],"Our":[222],"can":[225],"be":[226],"valuable":[228],"tool":[229],"creating":[231],"comprehensive":[232],"georeferenced":[233],"records":[235],"modeling":[237],"future":[240],"outbreaks.":[241]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
