{"id":"https://openalex.org/W4406459416","doi":"https://doi.org/10.1109/bigdata62323.2024.10825848","title":"Empirical Evaluation of Ensemble and Single Model Approaches for Multilingual Predictive Coding","display_name":"Empirical Evaluation of Ensemble and Single Model Approaches for Multilingual Predictive Coding","publication_year":2024,"publication_date":"2024-12-15","ids":{"openalex":"https://openalex.org/W4406459416","doi":"https://doi.org/10.1109/bigdata62323.2024.10825848"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata62323.2024.10825848","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10825848","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5053350610","display_name":"Jingchao Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I68939703","display_name":"Fujikura (United States)","ror":"https://ror.org/00qpbjw91","country_code":"US","type":"company","lineage":["https://openalex.org/I4210098230","https://openalex.org/I68939703"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jingchao Yang","raw_affiliation_strings":["LLC,Data &#x0026; Technology Ankura Consulting Group,Washington, D.C.,USA"],"affiliations":[{"raw_affiliation_string":"LLC,Data &#x0026; Technology Ankura Consulting Group,Washington, D.C.,USA","institution_ids":["https://openalex.org/I68939703"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032454032","display_name":"Adam D\u0105browski","orcid":"https://orcid.org/0000-0003-4258-2435"},"institutions":[{"id":"https://openalex.org/I68939703","display_name":"Fujikura (United States)","ror":"https://ror.org/00qpbjw91","country_code":"US","type":"company","lineage":["https://openalex.org/I4210098230","https://openalex.org/I68939703"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Adam Dabrowski","raw_affiliation_strings":["LLC,Data &#x0026; Technology Ankura Consulting Group,Washington, D.C.,USA"],"affiliations":[{"raw_affiliation_string":"LLC,Data &#x0026; Technology Ankura Consulting Group,Washington, D.C.,USA","institution_ids":["https://openalex.org/I68939703"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042371258","display_name":"Robert Neary","orcid":null},"institutions":[{"id":"https://openalex.org/I68939703","display_name":"Fujikura (United States)","ror":"https://ror.org/00qpbjw91","country_code":"US","type":"company","lineage":["https://openalex.org/I4210098230","https://openalex.org/I68939703"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Robert Neary","raw_affiliation_strings":["LLC,Data &#x0026; Technology Ankura Consulting Group,Washington, D.C.,USA"],"affiliations":[{"raw_affiliation_string":"LLC,Data &#x0026; Technology Ankura Consulting Group,Washington, D.C.,USA","institution_ids":["https://openalex.org/I68939703"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049186333","display_name":"Nathaniel Huber-Fliflet","orcid":null},"institutions":[{"id":"https://openalex.org/I68939703","display_name":"Fujikura (United States)","ror":"https://ror.org/00qpbjw91","country_code":"US","type":"company","lineage":["https://openalex.org/I4210098230","https://openalex.org/I68939703"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nathaniel Huber-Fliflet","raw_affiliation_strings":["LLC,Data &#x0026; Technology Ankura Consulting Group,Washington, D.C.,USA"],"affiliations":[{"raw_affiliation_string":"LLC,Data &#x0026; Technology Ankura Consulting Group,Washington, D.C.,USA","institution_ids":["https://openalex.org/I68939703"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052463733","display_name":"Fusheng Wei","orcid":"https://orcid.org/0009-0000-5881-5307"},"institutions":[{"id":"https://openalex.org/I68939703","display_name":"Fujikura (United States)","ror":"https://ror.org/00qpbjw91","country_code":"US","type":"company","lineage":["https://openalex.org/I4210098230","https://openalex.org/I68939703"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Fusheng Wei","raw_affiliation_strings":["LLC,Data &#x0026; Technology Ankura Consulting Group,Washington, D.C.,USA"],"affiliations":[{"raw_affiliation_string":"LLC,Data &#x0026; Technology Ankura Consulting Group,Washington, D.C.,USA","institution_ids":["https://openalex.org/I68939703"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101453263","display_name":"Qiang Mao","orcid":"https://orcid.org/0000-0002-9382-6527"},"institutions":[{"id":"https://openalex.org/I68939703","display_name":"Fujikura (United States)","ror":"https://ror.org/00qpbjw91","country_code":"US","type":"company","lineage":["https://openalex.org/I4210098230","https://openalex.org/I68939703"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Qiang Mao","raw_affiliation_strings":["LLC,Data &#x0026; Technology Ankura Consulting Group,Washington, D.C.,USA"],"affiliations":[{"raw_affiliation_string":"LLC,Data &#x0026; Technology Ankura Consulting Group,Washington, D.C.,USA","institution_ids":["https://openalex.org/I68939703"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5053350610"],"corresponding_institution_ids":["https://openalex.org/I68939703"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.23724133,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"4768","last_page":"4772"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9932000041007996,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/predictive-coding","display_name":"Predictive coding","score":0.727270245552063},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7211849093437195},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.5683140158653259},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4587881565093994},{"id":"https://openalex.org/keywords/empirical-research","display_name":"Empirical research","score":0.4366079568862915},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4111538231372833},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3315102159976959},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.1468554437160492},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1048918068408966}],"concepts":[{"id":"https://openalex.org/C2778061373","wikidata":"https://www.wikidata.org/wiki/Q1315146","display_name":"Predictive coding","level":3,"score":0.727270245552063},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7211849093437195},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.5683140158653259},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4587881565093994},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.4366079568862915},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4111538231372833},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3315102159976959},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.1468554437160492},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1048918068408966}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata62323.2024.10825848","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10825848","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.44999998807907104,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W2413512986","https://openalex.org/W2585689263","https://openalex.org/W2784310254","https://openalex.org/W3013534122","https://openalex.org/W3173327529","https://openalex.org/W4389519438","https://openalex.org/W4391113954","https://openalex.org/W6715324707","https://openalex.org/W6763932988"],"related_works":["https://openalex.org/W2955990109","https://openalex.org/W2017266518","https://openalex.org/W2584189775","https://openalex.org/W2784543861","https://openalex.org/W284530269","https://openalex.org/W2988666471","https://openalex.org/W2961085424","https://openalex.org/W2606267813","https://openalex.org/W2114069527","https://openalex.org/W2369033652"],"abstract_inverted_index":{"Corporate":[0],"litigation":[1],"costs":[2],"associated":[3,84],"with":[4,85],"manually":[5],"reviewing":[6],"documents":[7,57,237],"in":[8,40,77,192,238,301],"legal":[9,28,42,65,86,97,207,268,278],"matters":[10,98],"continue":[11],"to":[12,33,39,54,142,164,262],"rise,":[13],"driven":[14],"by":[15,22],"the":[16,41,61,69,80,107,118,122,132,137,187,228,244,264,284,296],"ever-expanding":[17],"volumes":[18],"of":[19,64,109,128,148,151,179,258,298],"data":[20],"generated":[21],"businesses.":[23],"To":[24],"address":[25],"this":[26,259],"challenge,":[27],"teams":[29],"have":[30],"increasingly":[31,99],"turned":[32],"text":[34,215],"classification":[35,70],"techniques,":[36],"commonly":[37],"referred":[38],"community":[43],"as":[44],"predictive":[45,47,72,110,113,162,299],"modeling,":[46],"coding,":[48],"or":[49],"\"Technology":[50],"Assisted":[51],"Review":[52],"(TAR)\",":[53],"identify":[55],"relevant":[56],"more":[58,89],"efficiently":[59],"during":[60],"discovery":[62,272],"phase":[63],"matters.":[66],"By":[67],"streamlining":[68],"process,":[71],"coding":[73,163,300],"has":[74],"proven":[75],"instrumental":[76],"minimizing":[78],"both":[79],"time":[81],"and":[82,146,172,212,221,242,287],"cost":[83],"discovery.":[87],"As":[88],"corporations":[90],"expand":[91],"their":[92,239],"operations":[93],"across":[94,248],"global":[95],"regions,":[96],"involve":[100],"multilingual":[101,165,169,200,271,292],"datasets,":[102],"introducing":[103],"significant":[104],"complexities":[105],"for":[106,160,235,267,291],"application":[108],"coding.":[111],"Traditional":[112],"models":[114,184,234],"exhibit":[115],"bias":[116],"towards":[117],"dominant":[119],"language":[120,153,246],"within":[121,131],"model\u2019s":[123],"training":[124],"data.":[125],"The":[126,196,256,274],"prevalence":[127],"multiple":[129],"languages":[130,241],"same":[133],"document":[134,194,214,293],"further":[135],"complicates":[136],"challenge.":[138],"This":[139,177],"can":[140],"lead":[141],"inefficiencies,":[143],"elevated":[144],"costs,":[145],"risks":[147],"inaccurate":[149],"classifications":[150],"nondominant":[152],"documents.This":[154],"research":[155,197,260],"empirically":[156],"examines":[157],"two":[158],"methods":[159],"applying":[161],"datasets:":[166],"a":[167,173,199,203,252],"single":[168,188,253],"model":[170,190,247],"approach":[171,191,230,290],"language-specific":[174,183,233],"modeling":[175,289],"approach.":[176],"group":[178],"collaborators":[180],"posits":[181],"that":[182,227],"will":[185,276],"outperform":[186],"multilanguage":[189,254],"evaluating":[193],"relevance.":[195],"used":[198],"dataset":[201],"from":[202],"recent":[204],"confidential":[205],"real-world":[206],"matter":[208],"containing":[209],"predominantly":[210],"English":[211],"Chinese":[213],"Performance":[216],"was":[217],"assessed":[218],"using":[219],"precision":[220],"recall":[222],"measures.":[223],"Our":[224],"results":[225],"show":[226],"ensemble":[229],"\u2013":[231],"employing":[232,243],"classifying":[236],"respective":[240],"mixed":[245],"all":[249],"remaining":[250],"documents\u2014outperforms":[251],"model.":[255],"outcomes":[257],"aim":[261],"inform":[263],"workflow":[265],"considerations":[266],"professionals":[269],"undertaking":[270],"efforts.":[273],"findings":[275],"help":[277],"practitioners":[279],"make":[280],"data-driven":[281],"decisions":[282],"on":[283],"most":[285],"efficient":[286],"accurate":[288],"review,":[294],"shaping":[295],"future":[297],"cross-linguistic":[302],"e-discovery.":[303]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
