{"id":"https://openalex.org/W4414297883","doi":"https://doi.org/10.1061/jccee5.cpeng-6634","title":"Advancing Traffic Safety Analysis: A Novel Lightweight Rule-Based and Part-of-Speech Tagging-Based Approach for Information Extraction from Crash Reports","display_name":"Advancing Traffic Safety Analysis: A Novel Lightweight Rule-Based and Part-of-Speech Tagging-Based Approach for Information Extraction from Crash Reports","publication_year":2025,"publication_date":"2025-09-17","ids":{"openalex":"https://openalex.org/W4414297883","doi":"https://doi.org/10.1061/jccee5.cpeng-6634"},"language":"en","primary_location":{"id":"doi:10.1061/jccee5.cpeng-6634","is_oa":false,"landing_page_url":"https://doi.org/10.1061/jccee5.cpeng-6634","pdf_url":null,"source":{"id":"https://openalex.org/S176637136","display_name":"Journal of Computing in Civil Engineering","issn_l":"0887-3801","issn":["0887-3801","1943-5487"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310315747","host_organization_name":"American Society of Civil Engineers","host_organization_lineage":["https://openalex.org/P4310315747"],"host_organization_lineage_names":["American Society of Civil Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Computing in Civil Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5108324200","display_name":"Jingyi Lai","orcid":null},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jingyi Lai","raw_affiliation_strings":["Purdue Univ"],"raw_orcid":"https://orcid.org/0009-0005-0648-6026","affiliations":[{"raw_affiliation_string":"Purdue Univ","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028874288","display_name":"Fan Yang","orcid":"https://orcid.org/0000-0001-9842-719X"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Fan Yang","raw_affiliation_strings":["Purdue Univ"],"raw_orcid":"https://orcid.org/0000-0001-9842-719X","affiliations":[{"raw_affiliation_string":"Purdue Univ","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100455168","display_name":"Hang Li","orcid":"https://orcid.org/0009-0009-4701-7380"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hang Li","raw_affiliation_strings":["Purdue Univ"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Purdue Univ","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100630296","display_name":"Jiansong Zhang","orcid":"https://orcid.org/0000-0001-5225-5943"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jiansong Zhang","raw_affiliation_strings":["Purdue Univ"],"raw_orcid":"https://orcid.org/0000-0001-5225-5943","affiliations":[{"raw_affiliation_string":"Purdue Univ","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011085340","display_name":"Yiheng Feng","orcid":"https://orcid.org/0000-0001-5656-3222"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yiheng Feng","raw_affiliation_strings":["Purdue Univ"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Purdue Univ","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033523598","display_name":"Tianfang Han","orcid":"https://orcid.org/0000-0002-3959-8199"},"institutions":[{"id":"https://openalex.org/I155093810","display_name":"University of Idaho","ror":"https://ror.org/03hbp5t65","country_code":"US","type":"education","lineage":["https://openalex.org/I155093810"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tianfang Han","raw_affiliation_strings":["Univ. of Idaho"],"raw_orcid":"https://orcid.org/0000-0002-3959-8199","affiliations":[{"raw_affiliation_string":"Univ. of Idaho","institution_ids":["https://openalex.org/I155093810"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102947870","display_name":"Yunfeng Chen","orcid":"https://orcid.org/0000-0002-7093-7590"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yunfeng Chen","raw_affiliation_strings":["Purdue Univ"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Purdue Univ","institution_ids":["https://openalex.org/I219193219"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5108324200"],"corresponding_institution_ids":["https://openalex.org/I219193219"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.12582164,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9815000295639038,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9790999889373779,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/crash","display_name":"Crash","score":0.8586999773979187},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.5303999781608582},{"id":"https://openalex.org/keywords/precision-and-recall","display_name":"Precision and recall","score":0.5149000287055969},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.48750001192092896},{"id":"https://openalex.org/keywords/natural-language-understanding","display_name":"Natural language understanding","score":0.42579999566078186},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4180999994277954},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.40540000796318054},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.3962000012397766},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.3806999921798706}],"concepts":[{"id":"https://openalex.org/C183469790","wikidata":"https://www.wikidata.org/wiki/Q333501","display_name":"Crash","level":2,"score":0.8586999773979187},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6816999912261963},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.5303999781608582},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.5149000287055969},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.48750001192092896},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.42579999566078186},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4180999994277954},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.40540000796318054},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.3962000012397766},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.3806999921798706},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3743000030517578},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.36169999837875366},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.35010001063346863},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.34630000591278076},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3452000021934509},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.3370000123977661},{"id":"https://openalex.org/C2779135771","wikidata":"https://www.wikidata.org/wiki/Q403574","display_name":"Named-entity recognition","level":3,"score":0.32330000400543213},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.31200000643730164},{"id":"https://openalex.org/C3017944768","wikidata":"https://www.wikidata.org/wiki/Q1450463","display_name":"Poison control","level":2,"score":0.30869999527931213},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3050000071525574},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.2930000126361847},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2879999876022339},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.2768999934196472},{"id":"https://openalex.org/C2777466982","wikidata":"https://www.wikidata.org/wiki/Q5227287","display_name":"Data extraction","level":3,"score":0.2727000117301941},{"id":"https://openalex.org/C87833898","wikidata":"https://www.wikidata.org/wiki/Q1060280","display_name":"Advanced driver assistance systems","level":2,"score":0.26989999413490295},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.2619999945163727},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.25769999623298645},{"id":"https://openalex.org/C132943942","wikidata":"https://www.wikidata.org/wiki/Q2562511","display_name":"Footprint","level":2,"score":0.2533000111579895},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2526000142097473}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1061/jccee5.cpeng-6634","is_oa":false,"landing_page_url":"https://doi.org/10.1061/jccee5.cpeng-6634","pdf_url":null,"source":{"id":"https://openalex.org/S176637136","display_name":"Journal of Computing in Civil Engineering","issn_l":"0887-3801","issn":["0887-3801","1943-5487"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310315747","host_organization_name":"American Society of Civil Engineers","host_organization_lineage":["https://openalex.org/P4310315747"],"host_organization_lineage_names":["American Society of Civil Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Computing in Civil Engineering","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W182831726","https://openalex.org/W1958077162","https://openalex.org/W1967622519","https://openalex.org/W1993834644","https://openalex.org/W2066023832","https://openalex.org/W2111614279","https://openalex.org/W2114276948","https://openalex.org/W2137079713","https://openalex.org/W2159636537","https://openalex.org/W2163514362","https://openalex.org/W2347658236","https://openalex.org/W2426119782","https://openalex.org/W2527259164","https://openalex.org/W2557995968","https://openalex.org/W2765909865","https://openalex.org/W2768488789","https://openalex.org/W2782973335","https://openalex.org/W2793451807","https://openalex.org/W2906230340","https://openalex.org/W2912647660","https://openalex.org/W2931789502","https://openalex.org/W2938933709","https://openalex.org/W2944105112","https://openalex.org/W2956093012","https://openalex.org/W2964318837","https://openalex.org/W2970819616","https://openalex.org/W2989610764","https://openalex.org/W3012016325","https://openalex.org/W3168090906","https://openalex.org/W3171501718","https://openalex.org/W3198433705","https://openalex.org/W4308009568","https://openalex.org/W4312199925","https://openalex.org/W4313543240","https://openalex.org/W4323363472","https://openalex.org/W4382397550","https://openalex.org/W4383426985","https://openalex.org/W4385745939","https://openalex.org/W4385798398","https://openalex.org/W4387526446","https://openalex.org/W4392456307","https://openalex.org/W4399890313"],"related_works":[],"abstract_inverted_index":{"Road":[0],"safety":[1,245],"remains":[2],"a":[3,72,110,239],"critical":[4],"issue":[5],"as":[6],"traffic":[7,46,90],"accidents":[8],"continue":[9],"to":[10,41,75,148,167,197],"rise.":[11],"Analyzing":[12],"crash":[13,47,62,91,106,115,233],"reports":[14,116],"is":[15],"vital":[16],"for":[17,67,117,121,159,192,242],"understanding":[18],"accident":[19],"causation":[20],"and":[21,58,84,101,119,143,174,186,200],"implementing":[22],"preventative":[23],"measures.":[24],"In":[25],"this":[26],"research,":[27],"we":[28],"focused":[29,96],"on":[30,78,97,179],"developing":[31],"an":[32],"information":[33,77,231],"extraction":[34,153],"system":[35],"utilizing":[36,162],"natural":[37,80],"language":[38],"processing":[39],"(NLP)":[40],"enhance":[42],"the":[43,55,136,156,180,193,209,212,217],"interpretation":[44],"of":[45,60,103,113,126,170,211,219],"reports.":[48,107],"While":[49],"standardized":[50],"forms":[51],"offer":[52],"basic":[53],"information,":[54],"unique":[56],"details":[57],"contexts":[59],"each":[61],"require":[63],"more":[64],"advanced":[65],"techniques":[66,222],"comprehensive":[68],"analysis.":[69,246],"We":[70,108],"employed":[71,155],"rule-based":[73,94,220],"approach":[74,95],"extract":[76,149],"unstructured":[79],"language,":[81],"emphasizing":[82],"syntactic":[83],"light":[85],"semantic":[86],"feature":[87],"recognition":[88],"in":[89,105,228],"narratives.":[92,235],"The":[93,152],"extracting":[98,229],"subjects,":[99],"actions,":[100],"objects":[102],"events":[104],"prepared":[109],"data":[111,182],"set":[112],"80":[114],"training":[118,137],"20":[120],"testing":[122,181],"from":[123,135,232],"Michigan":[124],"Office":[125],"Highway":[127],"Safety":[128],"Planning.":[129],"A":[130],"new":[131],"ruleset":[132],"was":[133],"developed":[134],"data,":[138,194],"incorporating":[139],"part-of-speech":[140],"(POS)":[141],"tagging":[142,172,227],"sentence":[144,175],"structure":[145],"patterns":[146],"matching":[147],"target":[150],"information.":[151],"process":[154],"General":[157],"Architecture":[158],"Text":[160],"Engineering,":[161],"its":[163],"essential":[164],"NLP":[165,221],"resources":[166],"find":[168],"matches":[169],"POS":[171,226],"features":[173],"structures":[176],"effectively.":[177],"Experiments":[178],"demonstrated":[183],"95.4%":[184],"precision":[185,199],"86.9%":[187],"recall":[188,202],"without":[189],"typos/grammar":[190,204],"correction":[191],"with":[195,203],"improvement":[196],"96.7%":[198],"90.16%":[201],"correction.":[205],"These":[206],"results":[207],"outperformed":[208],"state":[210],"art":[213],"including":[214],"ChatGPT-4o,":[215],"highlighting":[216],"potential":[218],"by":[223],"mainly":[224],"using":[225],"key":[230],"report":[234],"This":[236],"research":[237],"offers":[238],"robust":[240],"tool":[241],"improving":[243],"road":[244]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
