{"id":"https://openalex.org/W4226365456","doi":"https://doi.org/10.1109/tcss.2022.3159677","title":"Natural Language Processing Applied to Forensics Information Extraction With Transformers and Graph Visualization","display_name":"Natural Language Processing Applied to Forensics Information Extraction With Transformers and Graph Visualization","publication_year":2022,"publication_date":"2022-04-05","ids":{"openalex":"https://openalex.org/W4226365456","doi":"https://doi.org/10.1109/tcss.2022.3159677"},"language":"en","primary_location":{"id":"doi:10.1109/tcss.2022.3159677","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcss.2022.3159677","pdf_url":null,"source":{"id":"https://openalex.org/S2490693980","display_name":"IEEE Transactions on Computational Social Systems","issn_l":"2329-924X","issn":["2329-924X","2373-7476"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computational Social Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5013346913","display_name":"Fillipe Barros Rodrigues","orcid":"https://orcid.org/0000-0002-0288-5987"},"institutions":[{"id":"https://openalex.org/I4210116315","display_name":"Bio-Bras","ror":"https://ror.org/01ng1nj07","country_code":"BR","type":"education","lineage":["https://openalex.org/I4210116315"]}],"countries":["BR"],"is_corresponding":true,"raw_author_name":"Fillipe Barros Rodrigues","raw_affiliation_strings":["Department of Electrical Engineering, Professional Post-Graduate Program in Electrical Engineering, University of Bras&#x00ED;lia, Bras&#x00ED;lia, Brazil","Professional Post-Graduate Program in Electrical Engineering, Department of Electrical Engineering, University of Bras&#x00ED;lia, Bras&#x00ED;lia 70.910-900, Brazil"],"raw_orcid":"https://orcid.org/0000-0002-0288-5987","affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, Professional Post-Graduate Program in Electrical Engineering, University of Bras&#x00ED;lia, Bras&#x00ED;lia, Brazil","institution_ids":[]},{"raw_affiliation_string":"Professional Post-Graduate Program in Electrical Engineering, Department of Electrical Engineering, University of Bras&#x00ED;lia, Bras&#x00ED;lia 70.910-900, Brazil","institution_ids":["https://openalex.org/I4210116315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070528120","display_name":"William Ferreira Giozza","orcid":"https://orcid.org/0000-0002-3003-3458"},"institutions":[{"id":"https://openalex.org/I4210116315","display_name":"Bio-Bras","ror":"https://ror.org/01ng1nj07","country_code":"BR","type":"education","lineage":["https://openalex.org/I4210116315"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"William Ferreira Giozza","raw_affiliation_strings":["Department of Electrical Engineering, Professional Post-Graduate Program in Electrical Engineering, University of Bras&#x00ED;lia, Bras&#x00ED;lia, Brazil","Professional Post-Graduate Program in Electrical Engineering, Department of Electrical Engineering, University of Bras&#x00ED;lia, Bras&#x00ED;lia 70.910-900, Brazil"],"raw_orcid":"https://orcid.org/0000-0002-3003-3458","affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, Professional Post-Graduate Program in Electrical Engineering, University of Bras&#x00ED;lia, Bras&#x00ED;lia, Brazil","institution_ids":[]},{"raw_affiliation_string":"Professional Post-Graduate Program in Electrical Engineering, Department of Electrical Engineering, University of Bras&#x00ED;lia, Bras&#x00ED;lia 70.910-900, Brazil","institution_ids":["https://openalex.org/I4210116315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000009381","display_name":"Robson de Oliveira Albuquerque","orcid":"https://orcid.org/0000-0002-6717-3374"},"institutions":[{"id":"https://openalex.org/I121748325","display_name":"Universidad Complutense de Madrid","ror":"https://ror.org/02p0gd045","country_code":"ES","type":"education","lineage":["https://openalex.org/I121748325"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Robson de Oliveira Albuquerque","raw_affiliation_strings":["Department of Electrical Engineering, Professional Post-Graduate Program in Electrical Engineering, University of Bras&#x00ED;lia, Bras&#x00ED;lia, Brazil","Professional Post-Graduate Program in Electrical Engineering, Department of Electrical Engineering, University of Bras&#x00ED;lia, Bras&#x00ED;lia 70.910-900, Brazil, and also with the Group of Analysis, Security and Systems (GASS), Department of Software Engineering and Artificial Intelligence (DISIA), Faculty of Computer Science and Engineering, Universidad Complutense de Madrid (UCM), 28040 Madrid, Spain"],"raw_orcid":"https://orcid.org/0000-0002-6717-3374","affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, Professional Post-Graduate Program in Electrical Engineering, University of Bras&#x00ED;lia, Bras&#x00ED;lia, Brazil","institution_ids":[]},{"raw_affiliation_string":"Professional Post-Graduate Program in Electrical Engineering, Department of Electrical Engineering, University of Bras&#x00ED;lia, Bras&#x00ED;lia 70.910-900, Brazil, and also with the Group of Analysis, Security and Systems (GASS), Department of Software Engineering and Artificial Intelligence (DISIA), Faculty of Computer Science and Engineering, Universidad Complutense de Madrid (UCM), 28040 Madrid, Spain","institution_ids":["https://openalex.org/I121748325"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5086946465","display_name":"Luis Javier Garc\u00eda Villalba","orcid":"https://orcid.org/0000-0001-7573-6272"},"institutions":[{"id":"https://openalex.org/I121748325","display_name":"Universidad Complutense de Madrid","ror":"https://ror.org/02p0gd045","country_code":"ES","type":"education","lineage":["https://openalex.org/I121748325"]},{"id":"https://openalex.org/I97750245","display_name":"Software (Spain)","ror":"https://ror.org/02ethns06","country_code":"ES","type":"company","lineage":["https://openalex.org/I4210087817","https://openalex.org/I97750245"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Luis Javier Garc\u00eda Villalba","raw_affiliation_strings":["Department of Software Engineering and Artificial Intelligence (DISIA), Faculty of Computer Science and Engineering, Group of Analysis, Security and Systems (GASS), Universidad Complutense de Madrid (UCM), Madrid, Spain","Group of Analysis, Security and Systems (GASS), Department of Software Engineering and Artificial Intelligence (DISIA), Faculty of Computer Science and Engineering, Universidad Complutense de Madrid (UCM), 28040 Madrid, Spain"],"raw_orcid":"https://orcid.org/0000-0001-7573-6272","affiliations":[{"raw_affiliation_string":"Department of Software Engineering and Artificial Intelligence (DISIA), Faculty of Computer Science and Engineering, Group of Analysis, Security and Systems (GASS), Universidad Complutense de Madrid (UCM), Madrid, Spain","institution_ids":["https://openalex.org/I121748325","https://openalex.org/I97750245"]},{"raw_affiliation_string":"Group of Analysis, Security and Systems (GASS), Department of Software Engineering and Artificial Intelligence (DISIA), Faculty of Computer Science and Engineering, Universidad Complutense de Madrid (UCM), 28040 Madrid, Spain","institution_ids":["https://openalex.org/I121748325","https://openalex.org/I97750245"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5013346913"],"corresponding_institution_ids":["https://openalex.org/I4210116315"],"apc_list":null,"apc_paid":null,"fwci":7.3377,"has_fulltext":false,"cited_by_count":24,"citation_normalized_percentile":{"value":0.97186048,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"11","issue":"4","first_page":"4727","last_page":"4743"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12034","display_name":"Digital and Cyber Forensics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12034","display_name":"Digital and Cyber Forensics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9884999990463257,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8698680400848389},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.7257731556892395},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5773389935493469},{"id":"https://openalex.org/keywords/named-entity-recognition","display_name":"Named-entity recognition","score":0.5765140056610107},{"id":"https://openalex.org/keywords/relationship-extraction","display_name":"Relationship extraction","score":0.5748484134674072},{"id":"https://openalex.org/keywords/categorization","display_name":"Categorization","score":0.5313580632209778},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5287845730781555},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5246118307113647},{"id":"https://openalex.org/keywords/digital-forensics","display_name":"Digital forensics","score":0.48960453271865845},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.4873126447200775},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.465433269739151},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.4363534450531006},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.41265496611595154},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4010918140411377},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3928203284740448},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.3825705051422119},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.13328394293785095}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8698680400848389},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.7257731556892395},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5773389935493469},{"id":"https://openalex.org/C2779135771","wikidata":"https://www.wikidata.org/wiki/Q403574","display_name":"Named-entity recognition","level":3,"score":0.5765140056610107},{"id":"https://openalex.org/C153604712","wikidata":"https://www.wikidata.org/wiki/Q7310755","display_name":"Relationship extraction","level":3,"score":0.5748484134674072},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.5313580632209778},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5287845730781555},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5246118307113647},{"id":"https://openalex.org/C84418412","wikidata":"https://www.wikidata.org/wiki/Q3246940","display_name":"Digital forensics","level":2,"score":0.48960453271865845},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.4873126447200775},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.465433269739151},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.4363534450531006},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.41265496611595154},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4010918140411377},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3928203284740448},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.3825705051422119},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.13328394293785095},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcss.2022.3159677","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcss.2022.3159677","pdf_url":null,"source":{"id":"https://openalex.org/S2490693980","display_name":"IEEE Transactions on Computational Social Systems","issn_l":"2329-924X","issn":["2329-924X","2373-7476"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computational Social Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.7099999785423279}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":50,"referenced_works":["https://openalex.org/W1446195290","https://openalex.org/W1623072288","https://openalex.org/W1967185460","https://openalex.org/W2110760901","https://openalex.org/W2120844411","https://openalex.org/W2144578941","https://openalex.org/W2578030757","https://openalex.org/W2724484220","https://openalex.org/W2759211898","https://openalex.org/W2760505947","https://openalex.org/W2770291346","https://openalex.org/W2811447695","https://openalex.org/W2888185184","https://openalex.org/W2906691146","https://openalex.org/W2911489562","https://openalex.org/W2949493644","https://openalex.org/W2953356739","https://openalex.org/W2962897570","https://openalex.org/W2963167649","https://openalex.org/W2963777632","https://openalex.org/W2964022985","https://openalex.org/W2965373594","https://openalex.org/W2970550868","https://openalex.org/W2970959783","https://openalex.org/W2994442779","https://openalex.org/W2996284762","https://openalex.org/W3011411500","https://openalex.org/W3035053871","https://openalex.org/W3080100402","https://openalex.org/W3096266342","https://openalex.org/W3101904655","https://openalex.org/W3104186312","https://openalex.org/W3104415840","https://openalex.org/W3175875420","https://openalex.org/W3176762756","https://openalex.org/W3183276505","https://openalex.org/W6601885229","https://openalex.org/W6632404493","https://openalex.org/W6675410526","https://openalex.org/W6683166357","https://openalex.org/W6740723061","https://openalex.org/W6755207826","https://openalex.org/W6760837195","https://openalex.org/W6768851824","https://openalex.org/W6770212877","https://openalex.org/W6771917389","https://openalex.org/W6784025715","https://openalex.org/W6785462510","https://openalex.org/W6790620667","https://openalex.org/W6794872663"],"related_works":["https://openalex.org/W2334378031","https://openalex.org/W842810586","https://openalex.org/W2916255597","https://openalex.org/W4319940250","https://openalex.org/W3095980030","https://openalex.org/W2352298027","https://openalex.org/W2092919065","https://openalex.org/W3138801416","https://openalex.org/W3198510869","https://openalex.org/W4379379356"],"abstract_inverted_index":{"Digital":[0],"forensics":[1],"analysis":[2],"is":[3,36,102],"a":[4,37,82,87],"slow":[5],"process":[6],"mainly":[7],"due":[8],"to":[9,71,85,104,118],"the":[10,64,128,150],"large":[11],"amount":[12,129],"and":[13,25,52,60,108,114,122,136,164],"variety":[14],"of":[15,28,57,95,130],"data.":[16],"Some":[17],"forensic":[18,88],"tools":[19],"help":[20],"categorize":[21],"files":[22],"by":[23],"type":[24],"allow":[26],"automatization":[27],"tasks,":[29],"like":[30],"named":[31],"entity":[32],"recognition":[33],"(NER).":[34],"NER":[35,107],"key":[38],"component":[39],"in":[40,63,76,111,145],"many":[41],"natural":[42],"language":[43,113],"processing":[44],"(NLP)":[45],"applications,":[46,157],"such":[47],"as":[48],"relation":[49],"extraction":[50,92,163],"(RE)":[51],"information":[53,91,162],"retrieval.":[54],"The":[55],"introduction":[56],"neural":[58],"networks":[59],"transformer":[61],"architectures":[62],"last":[65],"few":[66],"years":[67],"made":[68],"it":[69,101],"possible":[70,103],"develop":[72,105],"more":[73],"accurate":[74],"models":[75,110],"different":[77],"languages.":[78],"This":[79],"work":[80],"proposes":[81],"reproducible":[83],"setup":[84],"build":[86,123],"pipeline":[89],"for":[90,133,154,160],"using":[93],"NLP":[94],"texts.":[96],"Our":[97],"results":[98],"show":[99],"that":[100,141],"both":[106],"RE":[109],"any":[112],"tune":[115],"its":[116],"hyper-parameters":[117],"achieve":[119],"state-of-art":[120],"performance":[121],"comprehensive":[124],"knowledge":[125],"graphs,":[126],"decreasing":[127],"time":[131],"required":[132],"human":[134],"supervision":[135],"review.":[137],"We":[138],"also":[139,159],"find":[140],"solving":[142],"this":[143],"task":[144],"phases":[146],"can":[147],"further":[148],"improve":[149],"performance,":[151],"not":[152],"only":[153],"digital":[155],"investigation":[156],"but":[158],"general-purpose":[161],"analysis.":[165]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
