{"id":"https://openalex.org/W2913176359","doi":"https://doi.org/10.1109/bigdata.2018.8622459","title":"Hybridization of Active Learning and Data Programming for Labeling Large Industrial Datasets","display_name":"Hybridization of Active Learning and Data Programming for Labeling Large Industrial Datasets","publication_year":2018,"publication_date":"2018-12-01","ids":{"openalex":"https://openalex.org/W2913176359","doi":"https://doi.org/10.1109/bigdata.2018.8622459","mag":"2913176359"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata.2018.8622459","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2018.8622459","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032311062","display_name":"Mona Nashaat","orcid":"https://orcid.org/0000-0002-7580-5757"},"institutions":[{"id":"https://openalex.org/I154425047","display_name":"University of Alberta","ror":"https://ror.org/0160cpw27","country_code":"CA","type":"education","lineage":["https://openalex.org/I154425047"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Mona Nashaat","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Alberta, Edmonton, Canada"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Alberta, Edmonton, Canada","institution_ids":["https://openalex.org/I154425047"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022645420","display_name":"Aindrila Ghosh","orcid":"https://orcid.org/0000-0003-4908-9491"},"institutions":[{"id":"https://openalex.org/I154425047","display_name":"University of Alberta","ror":"https://ror.org/0160cpw27","country_code":"CA","type":"education","lineage":["https://openalex.org/I154425047"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Aindrila Ghosh","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Alberta, Edmonton, Canada"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Alberta, Edmonton, Canada","institution_ids":["https://openalex.org/I154425047"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030495591","display_name":"James Miller","orcid":"https://orcid.org/0000-0001-5095-3000"},"institutions":[{"id":"https://openalex.org/I154425047","display_name":"University of Alberta","ror":"https://ror.org/0160cpw27","country_code":"CA","type":"education","lineage":["https://openalex.org/I154425047"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"James Miller","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Alberta, Edmonton, Canada"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Alberta, Edmonton, Canada","institution_ids":["https://openalex.org/I154425047"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031825849","display_name":"Shaikh Quader","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113654","display_name":"IBM (Canada)","ror":"https://ror.org/025sxka56","country_code":"CA","type":"company","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210113654"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Shaikh Quader","raw_affiliation_strings":["IBM Canada, Toronto, Canada"],"affiliations":[{"raw_affiliation_string":"IBM Canada, Toronto, Canada","institution_ids":["https://openalex.org/I4210113654"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065864217","display_name":"Chad Marston","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chad Marston","raw_affiliation_strings":["IBM USA, USA"],"affiliations":[{"raw_affiliation_string":"IBM USA, USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5114004733","display_name":"Jean\u2010Fran\u00e7ois Puget","orcid":null},"institutions":[{"id":"https://openalex.org/I4210112067","display_name":"IBM (France)","ror":"https://ror.org/02wnbr922","country_code":"FR","type":"company","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210112067"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Jean-Francois Puget","raw_affiliation_strings":["IBM France, France"],"affiliations":[{"raw_affiliation_string":"IBM France, France","institution_ids":["https://openalex.org/I4210112067"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5032311062"],"corresponding_institution_ids":["https://openalex.org/I154425047"],"apc_list":null,"apc_paid":null,"fwci":1.6923,"has_fulltext":false,"cited_by_count":18,"citation_normalized_percentile":{"value":0.88580158,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"46","last_page":"55"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9922000169754028,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8382638692855835},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7265069484710693},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.6046724915504456},{"id":"https://openalex.org/keywords/labeled-data","display_name":"Labeled data","score":0.5691201090812683},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.5541234016418457},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5538293123245239},{"id":"https://openalex.org/keywords/active-learning","display_name":"Active learning (machine learning)","score":0.5170679688453674},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4737214148044586},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.46587109565734863},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.14692580699920654}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8382638692855835},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7265069484710693},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6046724915504456},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.5691201090812683},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.5541234016418457},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5538293123245239},{"id":"https://openalex.org/C77967617","wikidata":"https://www.wikidata.org/wiki/Q4677561","display_name":"Active learning (machine learning)","level":2,"score":0.5170679688453674},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4737214148044586},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.46587109565734863},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.14692580699920654},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata.2018.8622459","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2018.8622459","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":70,"referenced_works":["https://openalex.org/W46659105","https://openalex.org/W1484084878","https://openalex.org/W1530042113","https://openalex.org/W1788633279","https://openalex.org/W1940008012","https://openalex.org/W2004672916","https://openalex.org/W2021732807","https://openalex.org/W2022096757","https://openalex.org/W2037866349","https://openalex.org/W2067566391","https://openalex.org/W2067760738","https://openalex.org/W2080021732","https://openalex.org/W2085988980","https://openalex.org/W2085989833","https://openalex.org/W2099550922","https://openalex.org/W2101498552","https://openalex.org/W2107598941","https://openalex.org/W2113741880","https://openalex.org/W2128302979","https://openalex.org/W2130486630","https://openalex.org/W2143010037","https://openalex.org/W2182886880","https://openalex.org/W2244581512","https://openalex.org/W2292361954","https://openalex.org/W2295598076","https://openalex.org/W2330449247","https://openalex.org/W2343514605","https://openalex.org/W2404161646","https://openalex.org/W2406349003","https://openalex.org/W2408920689","https://openalex.org/W2441269247","https://openalex.org/W2471138382","https://openalex.org/W2533861448","https://openalex.org/W2575032143","https://openalex.org/W2577784528","https://openalex.org/W2579642215","https://openalex.org/W2584099312","https://openalex.org/W2594164625","https://openalex.org/W2600061660","https://openalex.org/W2604259521","https://openalex.org/W2614062172","https://openalex.org/W2668114903","https://openalex.org/W2753688405","https://openalex.org/W2760375580","https://openalex.org/W2765482949","https://openalex.org/W2769041395","https://openalex.org/W2771169143","https://openalex.org/W2782896998","https://openalex.org/W2783702157","https://openalex.org/W2798587560","https://openalex.org/W2903158431","https://openalex.org/W2951911250","https://openalex.org/W2953132584","https://openalex.org/W2959716049","https://openalex.org/W2963696295","https://openalex.org/W2963772355","https://openalex.org/W3102476541","https://openalex.org/W6602002561","https://openalex.org/W6628826841","https://openalex.org/W6631580585","https://openalex.org/W6640485552","https://openalex.org/W6656015373","https://openalex.org/W6713680328","https://openalex.org/W6732730354","https://openalex.org/W6734172854","https://openalex.org/W6735856480","https://openalex.org/W6743971617","https://openalex.org/W6745144795","https://openalex.org/W6750276444","https://openalex.org/W6756615331"],"related_works":["https://openalex.org/W2361861616","https://openalex.org/W2263699433","https://openalex.org/W2377979023","https://openalex.org/W2218034408","https://openalex.org/W2392921965","https://openalex.org/W2358755282","https://openalex.org/W2625833328","https://openalex.org/W4206195464","https://openalex.org/W2949671220","https://openalex.org/W4390832402"],"abstract_inverted_index":{"Modern":[0],"machine":[1],"learning":[2,55,197],"(ML)":[3],"models":[4,24],"are":[5,61,94],"being":[6],"used":[7,65,179],"heavily":[8],"in":[9,105,237],"business":[10,187,239],"domains":[11],"to":[12,49,66,83,99,140,150,154,204],"build":[13],"effective":[14],"decision":[15],"support":[16],"systems.":[17],"As":[18],"a":[19,31,110,168,185,243],"primary":[20],"requirement,":[21],"supervised":[22],"ML":[23],"need":[25],"large":[26,86],"labeled":[27,35],"datasets.":[28,68],"However,":[29],"obtaining":[30],"high":[32],"volume":[33],"of":[34,118,126,163,189,211,246],"training":[36,144],"data":[37,145,199,228],"is":[38,80],"both":[39,70],"expensive":[40,82],"and":[41,57,75,89,97,124,146,184,198,208],"time-consuming.":[42],"Researchers":[43],"have":[44,72,178],"proposed":[45,129,175,213,220],"several":[46],"labeling":[47,52,135,225,235],"approaches":[48,71],"avoid":[50],"manual":[51],"efforts.":[53],"Active":[54],"(AL)":[56],"Data":[58],"Programming":[59],"(DP)":[60],"two":[62],"state-of-the-art":[63],"techniques":[64,201],"label":[67,155],"Nevertheless,":[69],"their":[73],"strengths":[74],"weaknesses.":[76],"For":[77],"example,":[78],"AL":[79,149],"computationally":[81],"apply":[84],"on":[85],"industrial":[87],"datasets;":[88],"labels":[90,166],"generated":[91],"by":[92,137],"DP":[93,119,139],"often":[95],"inaccurate":[96],"difficult":[98],"interpret.":[100],"To":[101,172],"address":[102],"these":[103],"challenges,":[104],"this":[106],"paper,":[107],"we":[108,177],"propose":[109],"novel":[111],"hybrid":[112],"method":[113,221],"that":[114,159,218],"integrates":[115],"the":[116,121,134,152,161,164,174,206,219,234],"scalability":[117],"with":[120,167,249],"user":[122,153],"engagement":[123],"accuracy":[125,162,226],"AL.":[127],"The":[128,215],"approach":[130],"aims":[131],"at":[132],"optimizing":[133],"process":[136],"applying":[138],"generate":[141],"initial":[142],"noisy":[143],"then":[147],"use":[148,194],"query":[151],"only":[156],"those":[157],"points":[158],"maximize":[160],"final":[165],"minimum":[169],"annotation":[170,209],"cost.":[171],"evaluate":[173],"approach,":[176],"five":[180],"open":[181],"source":[182],"datasets":[183],"real-world":[186,238],"dataset":[188],"1.5":[190],"million":[191],"records.":[192],"We":[193],"traditional":[195],"active":[196,250],"programming":[200],"as":[202],"baselines":[203],"compare":[205],"performance":[207,247],"cost":[210,236],"our":[212],"approach.":[214],"results":[216],"show":[217],"can":[222,232],"achieve":[223],"higher":[224],"than":[227],"programming.":[229],"It":[230],"also":[231],"minimize":[233],"scenarios,":[240],"while":[241],"delivering":[242],"comparable":[244],"level":[245],"(accuracy)":[248],"learning.":[251]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":1}],"updated_date":"2026-04-14T08:04:32.555800","created_date":"2025-10-10T00:00:00"}
