{"id":"https://openalex.org/W4413757117","doi":"https://doi.org/10.1145/3704268.3742697","title":"Document Classification using File Names","display_name":"Document Classification using File Names","publication_year":2025,"publication_date":"2025-08-27","ids":{"openalex":"https://openalex.org/W4413757117","doi":"https://doi.org/10.1145/3704268.3742697"},"language":"en","primary_location":{"id":"doi:10.1145/3704268.3742697","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3704268.3742697","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3704268.3742697","source":null,"license":"cc-by-nd","license_id":"https://openalex.org/licenses/cc-by-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 ACM Symposium on Document Engineering","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3704268.3742697","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5006253547","display_name":"Zhijian Li","orcid":null},"institutions":[{"id":"https://openalex.org/I200719446","display_name":"Vanderbilt University","ror":"https://ror.org/02vm5rt34","country_code":"US","type":"education","lineage":["https://openalex.org/I200719446"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Zhijian Li","raw_affiliation_strings":["Vanderbilt University, Nashville, Tennessee, USA"],"affiliations":[{"raw_affiliation_string":"Vanderbilt University, Nashville, Tennessee, USA","institution_ids":["https://openalex.org/I200719446"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023740689","display_name":"Stefan Larson","orcid":null},"institutions":[{"id":"https://openalex.org/I200719446","display_name":"Vanderbilt University","ror":"https://ror.org/02vm5rt34","country_code":"US","type":"education","lineage":["https://openalex.org/I200719446"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Stefan Larson","raw_affiliation_strings":["Vanderbilt University, Nashville, Tennessee, USA"],"affiliations":[{"raw_affiliation_string":"Vanderbilt University, Nashville, Tennessee, USA","institution_ids":["https://openalex.org/I200719446"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5030030910","display_name":"Kevin Leach","orcid":"https://orcid.org/0000-0002-4001-3442"},"institutions":[{"id":"https://openalex.org/I200719446","display_name":"Vanderbilt University","ror":"https://ror.org/02vm5rt34","country_code":"US","type":"education","lineage":["https://openalex.org/I200719446"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kevin Leach","raw_affiliation_strings":["Vanderbilt University, Nashville, Tennessee, USA"],"affiliations":[{"raw_affiliation_string":"Vanderbilt University, Nashville, Tennessee, USA","institution_ids":["https://openalex.org/I200719446"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5006253547"],"corresponding_institution_ids":["https://openalex.org/I200719446"],"apc_list":null,"apc_paid":null,"fwci":6.333,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.9656964,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"10"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9919000267982483,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9919000267982483,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12034","display_name":"Digital and Cyber Forensics","score":0.9909999966621399,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.984000027179718,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7743319869041443},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.474865198135376},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.37360885739326477},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.3659796714782715}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7743319869041443},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.474865198135376},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.37360885739326477},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3659796714782715}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3704268.3742697","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3704268.3742697","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3704268.3742697","source":null,"license":"cc-by-nd","license_id":"https://openalex.org/licenses/cc-by-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 ACM Symposium on Document Engineering","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3704268.3742697","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3704268.3742697","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3704268.3742697","source":null,"license":"cc-by-nd","license_id":"https://openalex.org/licenses/cc-by-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 ACM Symposium on Document Engineering","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4413757117.pdf","grobid_xml":"https://content.openalex.org/works/W4413757117.grobid-xml"},"referenced_works_count":18,"referenced_works":["https://openalex.org/W1516184288","https://openalex.org/W2031408949","https://openalex.org/W2107391405","https://openalex.org/W2295598076","https://openalex.org/W2962788341","https://openalex.org/W2986193249","https://openalex.org/W2997154779","https://openalex.org/W2997591727","https://openalex.org/W3007809473","https://openalex.org/W3104953317","https://openalex.org/W3176851559","https://openalex.org/W3195993511","https://openalex.org/W4226443146","https://openalex.org/W4245267204","https://openalex.org/W4285261371","https://openalex.org/W4304013646","https://openalex.org/W4304014014","https://openalex.org/W4312233877"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Rapid":[0],"document":[1,132,139,149],"classification":[2,127],"is":[3],"critical":[4,142],"in":[5,86,141],"several":[6],"time-sensitive":[7],"applications":[8],"like":[9],"digital":[10],"forensics":[11],"and":[12,36,66,105,146],"large-scale":[13],"media":[14],"classification.":[15,150],"Traditional":[16],"approaches":[17],"that":[18,90,122],"rely":[19],"on":[20,72,82],"heavy-duty":[21],"deep":[22],"learning":[23,54],"models":[24,115],"fall":[25],"short":[26],"due":[27],"to":[28,64,131],"high":[29],"inference":[30,79],"times":[31],"over":[32],"vast":[33,138],"input":[34],"datasets":[35,84,140],"computational":[37],"resources":[38],"associated":[39],"with":[40,57,103],"analyzing":[41],"whole":[42],"documents.":[43],"In":[44],"this":[45,87],"paper,":[46],"we":[47],"present":[48],"a":[49,58,129],"method":[50],"using":[51],"lightweight":[52,124],"supervised":[53],"models,":[55],"combined":[56],"TF-IDF":[59],"feature":[60],"extraction-based":[61],"tokenization":[62],"method,":[63],"accurately":[65],"efficiently":[67,136],"classify":[68],"documents":[69,102],"based":[70],"solely":[71],"their":[73],"file":[74,92,125],"names,":[75],"which":[76],"substantially":[77],"reduces":[78],"time.":[80],"Experiments":[81],"two":[83],"introduced":[85],"paper":[88],"show":[89],"our":[91],"name":[93,126],"classifiers":[94],"correctly":[95],"predict":[96],"more":[97,113,147],"than":[98,112],"90%":[99],"of":[100],"in-scope":[101],"99.63%":[104],"96.57%":[106],"accuracy":[107],"while":[108],"being":[109],"442x":[110],"faster":[111],"complex":[114],"such":[116],"as":[117,128],"DiT.":[118],"Our":[119],"results":[120],"demonstrate":[121],"incorporating":[123],"front-end":[130],"analysis":[133],"pipelines":[134],"can":[135],"process":[137],"scenarios,":[143],"enabling":[144],"fast":[145],"reliable":[148]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-02T15:55:50.835912","created_date":"2025-10-10T00:00:00"}
