{"id":"https://openalex.org/W4320024216","doi":"https://doi.org/10.1109/bigdata55660.2022.10020327","title":"A SHAP-based Active Learning Approach for Creating High-Quality Training Data","display_name":"A SHAP-based Active Learning Approach for Creating High-Quality Training Data","publication_year":2022,"publication_date":"2022-12-17","ids":{"openalex":"https://openalex.org/W4320024216","doi":"https://doi.org/10.1109/bigdata55660.2022.10020327"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata55660.2022.10020327","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata55660.2022.10020327","pdf_url":null,"source":{"id":"https://openalex.org/S4363607709","display_name":"2022 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5073571904","display_name":"Nailcan Kara","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Nailcan Kara","raw_affiliation_strings":["Analytics Center of Excellence Fibabanka,&#x0130;zmir,Turkey"],"affiliations":[{"raw_affiliation_string":"Analytics Center of Excellence Fibabanka,&#x0130;zmir,Turkey","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080753002","display_name":"Yagiz Levent Gume","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yagiz Levent Gume","raw_affiliation_strings":["R&#x0026;D Center Fibabanka,&#x0130;stanbul,Turkey"],"affiliations":[{"raw_affiliation_string":"R&#x0026;D Center Fibabanka,&#x0130;stanbul,Turkey","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021730288","display_name":"Umit Tigrak","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Umit Tigrak","raw_affiliation_strings":["Analytics Center of Excellence Fibabanka,&#x0130;stanbul,Turkey"],"affiliations":[{"raw_affiliation_string":"Analytics Center of Excellence Fibabanka,&#x0130;stanbul,Turkey","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064543875","display_name":"Gokce Ezeroglu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gokce Ezeroglu","raw_affiliation_strings":["Analytics Center of Excellence Fibabanka,&#x0130;zmir,Turkey"],"affiliations":[{"raw_affiliation_string":"Analytics Center of Excellence Fibabanka,&#x0130;zmir,Turkey","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011739033","display_name":"Serdar Mola","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Serdar Mola","raw_affiliation_strings":["Analytics Center of Excellence Fibabanka,&#x0130;zmir,Turkey"],"affiliations":[{"raw_affiliation_string":"Analytics Center of Excellence Fibabanka,&#x0130;zmir,Turkey","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005930349","display_name":"Omer Burak Akgun","orcid":"https://orcid.org/0009-0006-1050-2049"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Omer Burak Akgun","raw_affiliation_strings":["Analytics Center of Excellence Fibabanka,&#x0130;zmir,Turkey"],"affiliations":[{"raw_affiliation_string":"Analytics Center of Excellence Fibabanka,&#x0130;zmir,Turkey","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5030252794","display_name":"Arzucan \u00d6zg\u00fcr","orcid":"https://orcid.org/0000-0001-8376-1056"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Arzucan Ozgur","raw_affiliation_strings":["Bo&#x011F;azi&#x00E7;i University,Department of Computer Engineering,&#x0130;stanbul,Turkey"],"affiliations":[{"raw_affiliation_string":"Bo&#x011F;azi&#x00E7;i University,Department of Computer Engineering,&#x0130;stanbul,Turkey","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5073571904"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.315,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.54870681,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"4002","last_page":"4008"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8075094223022461},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.6296053528785706},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.6181289553642273},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.6096857786178589},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6075571775436401},{"id":"https://openalex.org/keywords/outsourcing","display_name":"Outsourcing","score":0.5936601161956787},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5926677584648132},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5376274585723877},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5359504818916321},{"id":"https://openalex.org/keywords/labeled-data","display_name":"Labeled data","score":0.5182540416717529},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.5163015723228455},{"id":"https://openalex.org/keywords/active-learning","display_name":"Active learning (machine learning)","score":0.46592187881469727},{"id":"https://openalex.org/keywords/data-set","display_name":"Data set","score":0.4532129168510437},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.4365340769290924},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4362732470035553},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.42731666564941406},{"id":"https://openalex.org/keywords/crowdsourcing","display_name":"Crowdsourcing","score":0.4104001820087433},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.410380095243454}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8075094223022461},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.6296053528785706},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.6181289553642273},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.6096857786178589},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6075571775436401},{"id":"https://openalex.org/C46934059","wikidata":"https://www.wikidata.org/wiki/Q61515","display_name":"Outsourcing","level":2,"score":0.5936601161956787},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5926677584648132},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5376274585723877},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5359504818916321},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.5182540416717529},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.5163015723228455},{"id":"https://openalex.org/C77967617","wikidata":"https://www.wikidata.org/wiki/Q4677561","display_name":"Active learning (machine learning)","level":2,"score":0.46592187881469727},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.4532129168510437},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.4365340769290924},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4362732470035553},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.42731666564941406},{"id":"https://openalex.org/C62230096","wikidata":"https://www.wikidata.org/wiki/Q275969","display_name":"Crowdsourcing","level":2,"score":0.4104001820087433},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.410380095243454},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata55660.2022.10020327","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata55660.2022.10020327","pdf_url":null,"source":{"id":"https://openalex.org/S4363607709","display_name":"2022 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W1514707997","https://openalex.org/W1777939696","https://openalex.org/W1993108076","https://openalex.org/W2080021732","https://openalex.org/W2144452292","https://openalex.org/W2600463316","https://openalex.org/W2607303097","https://openalex.org/W2796084947","https://openalex.org/W2937423263","https://openalex.org/W2963742748","https://openalex.org/W2970769659","https://openalex.org/W3105625590","https://openalex.org/W3156333129","https://openalex.org/W3183150041","https://openalex.org/W3184133963","https://openalex.org/W3192495122","https://openalex.org/W3205626500","https://openalex.org/W3209051700","https://openalex.org/W4242069725","https://openalex.org/W4287198657","https://openalex.org/W4300895248","https://openalex.org/W6676483475","https://openalex.org/W6676947859","https://openalex.org/W6683869333","https://openalex.org/W6685320204","https://openalex.org/W6755207826","https://openalex.org/W6791720600","https://openalex.org/W6794154069","https://openalex.org/W6794404659","https://openalex.org/W6803012540","https://openalex.org/W7023974811"],"related_works":["https://openalex.org/W2737596577","https://openalex.org/W2949671220","https://openalex.org/W2130553454","https://openalex.org/W4317548404","https://openalex.org/W3022007134","https://openalex.org/W2087783760","https://openalex.org/W1492505081","https://openalex.org/W2033364610","https://openalex.org/W2797776314","https://openalex.org/W3163689946"],"abstract_inverted_index":{"Machine":[0],"learning-based":[1,46],"text":[2],"classification":[3],"models":[4],"require":[5],"labeled":[6,94],"data":[7,31,59],"for":[8,84,182],"training.":[9],"However,":[10],"manual":[11],"labeling":[12,32],"is":[13,21,33,130,178],"a":[14,43,139,179],"costly":[15],"and":[16,79],"time-consuming":[17],"process.":[18],"This":[19,170],"task":[20],"particularly":[22],"difficult":[23,52],"in":[24,48,54,73,109,113,157],"domains":[25],"such":[26],"as":[27,166],"banking,":[28],"where":[29],"outsourcing":[30],"generally":[34],"not":[35],"allowed":[36],"due":[37],"to":[38,76,81,98,132,138],"privacy":[39],"laws.":[40],"We":[41,102],"propose":[42],"novel":[44],"active":[45,176],"approach":[47,108,129],"which":[49],"the":[50,55,64,71,74,99,104,114,127,134,144,155,160],"most":[51],"instances":[53,95],"pool":[56],"of":[57,70,89,106,142],"unlabeled":[58],"are":[60,96,124],"selected":[61,149],"based":[62,175],"on":[63],"Shapley":[65],"Additive":[66],"Explanations":[67],"(SHAP)":[68],"values":[69],"words":[72],"texts":[75],"be":[77],"classified":[78],"passed":[80],"human":[82,190],"annotators":[83],"labeling.":[85],"At":[86],"each":[87],"iteration":[88],"this":[90,107],"human-in-the-loop":[91,174],"strategy,":[92],"newly":[93],"added":[97],"training":[100,135,145,185],"set.":[101],"demonstrate":[103],"effectiveness":[105],"classifying":[110],"customer":[111],"comments":[112],"banking":[115],"domain":[116],"surveys.":[117],"Our":[118],"experiments":[119],"indicate":[120],"that":[121,154,173],"better":[122],"results":[123],"achieved":[125],"when":[126],"proposed":[128],"used":[131],"expand":[133],"set,":[136],"compared":[137],"baseline":[140],"strategy":[141,181],"expanding":[143],"set":[146],"with":[147],"randomly":[148],"instances.":[150],"Further":[151],"analysis":[152],"shows":[153],"difference":[156],"performance":[158],"between":[159],"two":[161],"approaches":[162],"becomes":[163],"more":[164],"pronounced":[165],"class":[167],"imbalance":[168],"increases.":[169],"study":[171],"suggests":[172],"learning":[177],"powerful":[180],"creating":[183],"high-quality":[184],"datasets":[186],"by":[187],"effectively":[188],"leveraging":[189],"annotation":[191],"effort.":[192]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2}],"updated_date":"2026-03-09T08:58:05.943551","created_date":"2025-10-10T00:00:00"}
