{"id":"https://openalex.org/W4409310793","doi":"https://doi.org/10.1109/tnse.2025.3559342","title":"Cost-Optimized Crowdsourcing for NLP via Worker Selection and Data Augmentation","display_name":"Cost-Optimized Crowdsourcing for NLP via Worker Selection and Data Augmentation","publication_year":2025,"publication_date":"2025-04-09","ids":{"openalex":"https://openalex.org/W4409310793","doi":"https://doi.org/10.1109/tnse.2025.3559342"},"language":"en","primary_location":{"id":"doi:10.1109/tnse.2025.3559342","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnse.2025.3559342","pdf_url":null,"source":{"id":"https://openalex.org/S2484352698","display_name":"IEEE Transactions on Network Science and Engineering","issn_l":"2327-4697","issn":["2327-4697","2334-329X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Network Science and Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5090083168","display_name":"Liner Yang","orcid":"https://orcid.org/0000-0001-8364-7165"},"institutions":[{"id":"https://openalex.org/I115212828","display_name":"Beijing Language and Culture University","ror":"https://ror.org/03te2zs36","country_code":"CN","type":"education","lineage":["https://openalex.org/I115212828"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Liner Yang","raw_affiliation_strings":["School of Information Science, Beijing Language and Culture University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Information Science, Beijing Language and Culture University, Beijing, China","institution_ids":["https://openalex.org/I115212828"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100373830","display_name":"Yujie Wang","orcid":"https://orcid.org/0000-0002-5424-1788"},"institutions":[{"id":"https://openalex.org/I21193070","display_name":"Beijing Jiaotong University","ror":"https://ror.org/01yj56c84","country_code":"CN","type":"education","lineage":["https://openalex.org/I21193070"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yujie Wang","raw_affiliation_strings":["School of Computer Science and Technology, Beijing Jiaotong University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Beijing Jiaotong University, Beijing, China","institution_ids":["https://openalex.org/I21193070"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010064740","display_name":"Zhixuan Fang","orcid":"https://orcid.org/0000-0001-7979-4269"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhixuan Fang","raw_affiliation_strings":["Institute for Interdisciplinary Information Sciences (IIIS), Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute for Interdisciplinary Information Sciences (IIIS), Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111979418","display_name":"Yaping Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I21193070","display_name":"Beijing Jiaotong University","ror":"https://ror.org/01yj56c84","country_code":"CN","type":"education","lineage":["https://openalex.org/I21193070"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yaping Huang","raw_affiliation_strings":["School of Computer Science and Technology, Beijing Jiaotong University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Beijing Jiaotong University, Beijing, China","institution_ids":["https://openalex.org/I21193070"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046021757","display_name":"Erhong Yang","orcid":"https://orcid.org/0009-0009-9859-5391"},"institutions":[{"id":"https://openalex.org/I115212828","display_name":"Beijing Language and Culture University","ror":"https://ror.org/03te2zs36","country_code":"CN","type":"education","lineage":["https://openalex.org/I115212828"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Erhong Yang","raw_affiliation_strings":["School of Information Science, Beijing Language and Culture University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Information Science, Beijing Language and Culture University, Beijing, China","institution_ids":["https://openalex.org/I115212828"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5090083168"],"corresponding_institution_ids":["https://openalex.org/I115212828"],"apc_list":null,"apc_paid":null,"fwci":1.6964,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.85688866,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"12","issue":"4","first_page":"3343","last_page":"3359"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9891999959945679,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11675","display_name":"Open Source Software Innovations","score":0.9876999855041504,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/crowdsourcing","display_name":"Crowdsourcing","score":0.9370031356811523},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6361871361732483},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6080009937286377},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.6069501638412476},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4699910879135132},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4371705949306488},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.111407071352005}],"concepts":[{"id":"https://openalex.org/C62230096","wikidata":"https://www.wikidata.org/wiki/Q275969","display_name":"Crowdsourcing","level":2,"score":0.9370031356811523},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6361871361732483},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6080009937286377},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.6069501638412476},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4699910879135132},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4371705949306488},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.111407071352005}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tnse.2025.3559342","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnse.2025.3559342","pdf_url":null,"source":{"id":"https://openalex.org/S2484352698","display_name":"IEEE Transactions on Network Science and Engineering","issn_l":"2327-4697","issn":["2327-4697","2334-329X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Network Science and Engineering","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Decent work and economic growth","id":"https://metadata.un.org/sdg/8","score":0.6200000047683716}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W1984243510","https://openalex.org/W2003497265","https://openalex.org/W2009551863","https://openalex.org/W2112511942","https://openalex.org/W2141649520","https://openalex.org/W2150638405","https://openalex.org/W2473536663","https://openalex.org/W2579405208","https://openalex.org/W2626052287","https://openalex.org/W2952087486","https://openalex.org/W2964197033","https://openalex.org/W3156621611","https://openalex.org/W3182346119","https://openalex.org/W3187134297","https://openalex.org/W4200585851","https://openalex.org/W4214717370","https://openalex.org/W4231916799","https://openalex.org/W4234228486","https://openalex.org/W4293409613","https://openalex.org/W4296211693","https://openalex.org/W4304190363","https://openalex.org/W4366752877","https://openalex.org/W4367599042","https://openalex.org/W4404822966","https://openalex.org/W4405974386","https://openalex.org/W6601392155","https://openalex.org/W6636392717","https://openalex.org/W6676077707","https://openalex.org/W6677593936","https://openalex.org/W6681875376","https://openalex.org/W6683738474","https://openalex.org/W6732226717","https://openalex.org/W6742043453","https://openalex.org/W6752326999","https://openalex.org/W6767559643","https://openalex.org/W6784982321","https://openalex.org/W6796650594","https://openalex.org/W6810476313","https://openalex.org/W6810481934","https://openalex.org/W6811053806"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W4387369504","https://openalex.org/W4394896187","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W3107602296","https://openalex.org/W4364306694","https://openalex.org/W4312192474","https://openalex.org/W4283697347"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"worker":[3,77,164],"selection":[4,78,165],"and":[5,14,56,69,99,114,134,152],"data":[6,103],"augmentation":[7,104],"algorithms":[8],"aimed":[9],"at":[10],"improving":[11],"annotation":[12],"quality":[13],"reducing":[15],"costs":[16],"in":[17,87],"crowdsourcing":[18],"for":[19,46],"Natural":[20],"Language":[21],"Processing":[22],"(NLP).":[23],"Unlike":[24],"previous":[25],"studies":[26],"targeting":[27],"simpler":[28],"tasks":[29],"like":[30],"binary":[31],"classification,":[32],"which":[33],"require":[34],"less":[35],"contextual":[36],"understanding,":[37],"this":[38],"study":[39],"aims":[40],"to":[41,123],"provide":[42,158],"a":[43,47,63,70,159],"unified":[44],"paradigm":[45],"wider":[48],"spectrum":[49],"of":[50,84,125,143],"NLP":[51,88],"tasks,":[52],"with":[53],"sequence":[54],"labeling":[55],"text":[57],"generation":[58],"as":[59],"application":[60],"showcases.":[61],"Utilizing":[62],"Combinatorial":[64],"Multi-Armed":[65],"Bandit":[66],"(CMAB)":[67],"approach":[68],"cost-effective":[71],"human":[72],"feedback":[73],"mechanism,":[74],"the":[75,82,94,108,118,126,144],"proposed":[76],"algorithm":[79,92],"effectively":[80],"addresses":[81],"challenge":[83],"label":[85],"inter-dependency":[86],"tasks.":[89],"Additionally,":[90],"our":[91,163],"tackles":[93],"issues":[95],"presented":[96],"by":[97],"imbalanced":[98],"small-scale":[100],"datasets":[101,116],"through":[102],"methods.":[105],"Experiments":[106],"on":[107],"CoNLL":[109],"2003":[110],"NER,":[111],"Chinese":[112],"OEI,":[113],"YACLC":[115],"demonstrated":[117],"algorithm's":[119],"efficiency,":[120],"achieving":[121],"up":[122],"100.04%":[124],"expert-only":[127],"baseline":[128,146],"<inline-formula":[129,147],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[130,148],"xmlns:xlink=\"http://www.w3.org/1999/xlink\"><tex-math":[131,149],"notation=\"LaTeX\">$\\textrm":[132,150],"{F}$</tex-math></inline-formula>-score":[133,151],"65.97%":[135],"cost":[136,154],"savings.":[137,155],"A":[138],"dataset-independent":[139],"experiment":[140],"yielded":[141],"97.56%":[142],"expert":[145],"59.88%":[153],"We":[156],"also":[157],"theoretical":[160],"analysis":[161],"proving":[162],"framework":[166],"achieves":[167],"sub-linear":[168],"regret.":[169]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
