{"id":"https://openalex.org/W3113497636","doi":"https://doi.org/10.1109/spw50608.2020.00066","title":"Research Report: Building a Wide Reach Corpus for Secure Parser Development","display_name":"Research Report: Building a Wide Reach Corpus for Secure Parser Development","publication_year":2020,"publication_date":"2020-05-01","ids":{"openalex":"https://openalex.org/W3113497636","doi":"https://doi.org/10.1109/spw50608.2020.00066","mag":"3113497636"},"language":"en","primary_location":{"id":"doi:10.1109/spw50608.2020.00066","is_oa":true,"landing_page_url":"https://doi.org/10.1109/spw50608.2020.00066","pdf_url":"https://ieeexplore.ieee.org/ielx7/9283745/9283819/09283855.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE Security and Privacy Workshops (SPW)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://ieeexplore.ieee.org/ielx7/9283745/9283819/09283855.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5028020012","display_name":"Tim Allison","orcid":null},"institutions":[{"id":"https://openalex.org/I1334627681","display_name":"Jet Propulsion Laboratory","ror":"https://ror.org/027k65916","country_code":"US","type":"facility","lineage":["https://openalex.org/I122411786","https://openalex.org/I1334627681","https://openalex.org/I4210124779"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Tim Allison","raw_affiliation_strings":["Jet Propulsion Laboratory, California Institute of Technology, Pasadena, California"],"affiliations":[{"raw_affiliation_string":"Jet Propulsion Laboratory, California Institute of Technology, Pasadena, California","institution_ids":["https://openalex.org/I1334627681"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075299522","display_name":"Wayne Burke","orcid":"https://orcid.org/0009-0008-9163-7627"},"institutions":[{"id":"https://openalex.org/I1334627681","display_name":"Jet Propulsion Laboratory","ror":"https://ror.org/027k65916","country_code":"US","type":"facility","lineage":["https://openalex.org/I122411786","https://openalex.org/I1334627681","https://openalex.org/I4210124779"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wayne Burke","raw_affiliation_strings":["Jet Propulsion Laboratory, California Institute of Technology, Pasadena, California"],"affiliations":[{"raw_affiliation_string":"Jet Propulsion Laboratory, California Institute of Technology, Pasadena, California","institution_ids":["https://openalex.org/I1334627681"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046994289","display_name":"Valentino Constantinou","orcid":"https://orcid.org/0000-0002-5279-4143"},"institutions":[{"id":"https://openalex.org/I1334627681","display_name":"Jet Propulsion Laboratory","ror":"https://ror.org/027k65916","country_code":"US","type":"facility","lineage":["https://openalex.org/I122411786","https://openalex.org/I1334627681","https://openalex.org/I4210124779"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Valentino Constantinou","raw_affiliation_strings":["Jet Propulsion Laboratory, California Institute of Technology, Pasadena, California"],"affiliations":[{"raw_affiliation_string":"Jet Propulsion Laboratory, California Institute of Technology, Pasadena, California","institution_ids":["https://openalex.org/I1334627681"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005795509","display_name":"Edwin Goh","orcid":"https://orcid.org/0000-0002-8445-9358"},"institutions":[{"id":"https://openalex.org/I1334627681","display_name":"Jet Propulsion Laboratory","ror":"https://ror.org/027k65916","country_code":"US","type":"facility","lineage":["https://openalex.org/I122411786","https://openalex.org/I1334627681","https://openalex.org/I4210124779"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Edwin Goh","raw_affiliation_strings":["Jet Propulsion Laboratory, California Institute of Technology, Pasadena, California"],"affiliations":[{"raw_affiliation_string":"Jet Propulsion Laboratory, California Institute of Technology, Pasadena, California","institution_ids":["https://openalex.org/I1334627681"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103260730","display_name":"Chris A. Mattmann","orcid":null},"institutions":[{"id":"https://openalex.org/I1334627681","display_name":"Jet Propulsion Laboratory","ror":"https://ror.org/027k65916","country_code":"US","type":"facility","lineage":["https://openalex.org/I122411786","https://openalex.org/I1334627681","https://openalex.org/I4210124779"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chris Mattmann","raw_affiliation_strings":["Jet Propulsion Laboratory, California Institute of Technology, Pasadena, California"],"affiliations":[{"raw_affiliation_string":"Jet Propulsion Laboratory, California Institute of Technology, Pasadena, California","institution_ids":["https://openalex.org/I1334627681"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081006768","display_name":"Anastasija Mensikova","orcid":null},"institutions":[{"id":"https://openalex.org/I1334627681","display_name":"Jet Propulsion Laboratory","ror":"https://ror.org/027k65916","country_code":"US","type":"facility","lineage":["https://openalex.org/I122411786","https://openalex.org/I1334627681","https://openalex.org/I4210124779"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anastasija Mensikova","raw_affiliation_strings":["Jet Propulsion Laboratory, California Institute of Technology, Pasadena, California"],"affiliations":[{"raw_affiliation_string":"Jet Propulsion Laboratory, California Institute of Technology, Pasadena, California","institution_ids":["https://openalex.org/I1334627681"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074008514","display_name":"Philip Southam","orcid":null},"institutions":[{"id":"https://openalex.org/I1334627681","display_name":"Jet Propulsion Laboratory","ror":"https://ror.org/027k65916","country_code":"US","type":"facility","lineage":["https://openalex.org/I122411786","https://openalex.org/I1334627681","https://openalex.org/I4210124779"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Philip Southam","raw_affiliation_strings":["Jet Propulsion Laboratory, California Institute of Technology, Pasadena, California"],"affiliations":[{"raw_affiliation_string":"Jet Propulsion Laboratory, California Institute of Technology, Pasadena, California","institution_ids":["https://openalex.org/I1334627681"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029916258","display_name":"Ryan Stonebraker","orcid":"https://orcid.org/0000-0001-8479-0975"},"institutions":[{"id":"https://openalex.org/I1334627681","display_name":"Jet Propulsion Laboratory","ror":"https://ror.org/027k65916","country_code":"US","type":"facility","lineage":["https://openalex.org/I122411786","https://openalex.org/I1334627681","https://openalex.org/I4210124779"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ryan Stonebraker","raw_affiliation_strings":["Jet Propulsion Laboratory, California Institute of Technology, Pasadena, California"],"affiliations":[{"raw_affiliation_string":"Jet Propulsion Laboratory, California Institute of Technology, Pasadena, California","institution_ids":["https://openalex.org/I1334627681"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5027751518","display_name":"Virisha Timmaraju","orcid":"https://orcid.org/0000-0001-5688-646X"},"institutions":[{"id":"https://openalex.org/I1334627681","display_name":"Jet Propulsion Laboratory","ror":"https://ror.org/027k65916","country_code":"US","type":"facility","lineage":["https://openalex.org/I122411786","https://openalex.org/I1334627681","https://openalex.org/I4210124779"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Virisha Timmaraju","raw_affiliation_strings":["Jet Propulsion Laboratory, California Institute of Technology, Pasadena, California"],"affiliations":[{"raw_affiliation_string":"Jet Propulsion Laboratory, California Institute of Technology, Pasadena, California","institution_ids":["https://openalex.org/I1334627681"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5028020012"],"corresponding_institution_ids":["https://openalex.org/I1334627681"],"apc_list":null,"apc_paid":null,"fwci":1.3914,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.87281546,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"12","issue":null,"first_page":"318","last_page":"326"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9886999726295471,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9074041247367859},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.7751270532608032},{"id":"https://openalex.org/keywords/documentation","display_name":"Documentation","score":0.6267816424369812},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.5341730713844299},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.487662672996521},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.46712902188301086},{"id":"https://openalex.org/keywords/verifiable-secret-sharing","display_name":"Verifiable secret sharing","score":0.4433860182762146},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.4398789405822754},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4012860953807831},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.3858001232147217},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.32451361417770386},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.283636212348938},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.10476282238960266}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9074041247367859},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.7751270532608032},{"id":"https://openalex.org/C56666940","wikidata":"https://www.wikidata.org/wiki/Q788790","display_name":"Documentation","level":2,"score":0.6267816424369812},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.5341730713844299},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.487662672996521},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.46712902188301086},{"id":"https://openalex.org/C85847156","wikidata":"https://www.wikidata.org/wiki/Q59015987","display_name":"Verifiable secret sharing","level":3,"score":0.4433860182762146},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4398789405822754},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4012860953807831},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.3858001232147217},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.32451361417770386},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.283636212348938},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.10476282238960266},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/spw50608.2020.00066","is_oa":true,"landing_page_url":"https://doi.org/10.1109/spw50608.2020.00066","pdf_url":"https://ieeexplore.ieee.org/ielx7/9283745/9283819/09283855.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE Security and Privacy Workshops (SPW)","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1109/spw50608.2020.00066","is_oa":true,"landing_page_url":"https://doi.org/10.1109/spw50608.2020.00066","pdf_url":"https://ieeexplore.ieee.org/ielx7/9283745/9283819/09283855.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE Security and Privacy Workshops (SPW)","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","score":0.4099999964237213,"id":"https://metadata.un.org/sdg/9"}],"awards":[{"id":"https://openalex.org/G3399935314","display_name":"An open source framework for metadata exploration and discovery of Polar Data","funder_award_id":"1348450","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5755523287","display_name":null,"funder_award_id":"ICER-","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5995532187","display_name":null,"funder_award_id":"ICER-1639753, PLR-1348450 and PLR-144562","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7903834952","display_name":null,"funder_award_id":"XDATA","funder_id":"https://openalex.org/F4320332180","funder_display_name":"Defense Advanced Research Projects Agency"},{"id":"https://openalex.org/G8022370896","display_name":"Earthcube Building Blocks: Collaborative Proposal: Polar Data Insights and Search Analytics for the Deep and Scientific Web","funder_award_id":"1639753","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G978455930","display_name":null,"funder_award_id":"ICER-1639753","funder_id":"https://openalex.org/F4320332180","funder_display_name":"Defense Advanced Research Projects Agency"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320306101","display_name":"National Aeronautics and Space Administration","ror":"https://ror.org/027ka1x80"},{"id":"https://openalex.org/F4320309398","display_name":"California Institute of Technology","ror":"https://ror.org/05dxps055"},{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3113497636.pdf","grobid_xml":"https://content.openalex.org/works/W3113497636.grobid-xml"},"referenced_works_count":22,"referenced_works":["https://openalex.org/W155037473","https://openalex.org/W155995321","https://openalex.org/W186995053","https://openalex.org/W1136076469","https://openalex.org/W1705363916","https://openalex.org/W1972531058","https://openalex.org/W2030457377","https://openalex.org/W2045941292","https://openalex.org/W2066536534","https://openalex.org/W2076342816","https://openalex.org/W2084264686","https://openalex.org/W2107997840","https://openalex.org/W2123442489","https://openalex.org/W2250741443","https://openalex.org/W2618146768","https://openalex.org/W2761152219","https://openalex.org/W2906830849","https://openalex.org/W2910550356","https://openalex.org/W4294029071","https://openalex.org/W6606406289","https://openalex.org/W6691127737","https://openalex.org/W6745208960"],"related_works":["https://openalex.org/W2355730523","https://openalex.org/W152021879","https://openalex.org/W2365629437","https://openalex.org/W2072918937","https://openalex.org/W2023935927","https://openalex.org/W2348330439","https://openalex.org/W2350372928","https://openalex.org/W2377292126","https://openalex.org/W3046984657","https://openalex.org/W2053943328"],"abstract_inverted_index":{"Computer":[0],"software":[1,41],"that":[2,128],"parses":[3],"electronic":[4],"files":[5],"is":[6],"often":[7],"vulnerable":[8],"to":[9,19,67,93,106,134],"maliciously":[10],"crafted":[11],"input":[12,37],"data.":[13],"Rather":[14],"than":[15],"relying":[16],"on":[17,116],"developers":[18,56],"implement":[20],"ad":[21],"hoc":[22],"defenses":[23],"against":[24],"such":[25],"data,":[26],"the":[27,40,76,85,142],"Language-theoretic":[28],"security":[29],"(LangSec)":[30],"philosophy":[31],"offers":[32],"formally":[33],"correct":[34],"and":[35,101,108,118,125],"verifiable":[36],"handling":[38],"throughout":[39],"development":[42],"lifecycle.":[43],"Whether":[44],"developing":[45],"from":[46,52,75],"a":[47,137],"specification":[48],"or":[49,72],"deriving":[50],"parsers":[51],"samples,":[53],"LangSec":[54,145],"parser":[55,146],"require":[57],"wide-reach":[58],"corpora":[59],"of":[60,87,120,144],"their":[61],"target":[62],"file":[63,139],"format":[64],"in":[65,110],"order":[66],"identify":[68],"key":[69],"edge":[70],"cases":[71],"common":[73],"deviations":[74],"format's":[77],"specification.":[78],"In":[79],"this":[80],"research":[81],"report,":[82],"we":[83,90,113],"provide":[84,114],"details":[86],"several":[88],"methods":[89],"have":[91],"used":[92],"gather":[94,136],"approximately":[95],"30":[96],"million":[97],"files,":[98],"extract":[99],"features":[100,104],"make":[102],"these":[103],"amenable":[105],"search":[107],"use":[109],"analytics.":[111],"Additionally,":[112],"documentation":[115],"opportunities":[117],"limitations":[119],"some":[121],"popular":[122],"open-source":[123],"datasets":[124],"annotation":[126],"tools":[127],"will":[129],"benefit":[130],"researchers":[131],"which":[132],"need":[133],"efficiently":[135],"large":[138],"corpus":[140],"for":[141],"purposes":[143],"development.":[147]},"counts_by_year":[{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
