{"id":"https://openalex.org/W2164479068","doi":"https://doi.org/10.3115/v1/w14-3356","title":"Crowdsourcing High-Quality Parallel Data Extraction from Twitter","display_name":"Crowdsourcing High-Quality Parallel Data Extraction from Twitter","publication_year":2014,"publication_date":"2014-01-01","ids":{"openalex":"https://openalex.org/W2164479068","doi":"https://doi.org/10.3115/v1/w14-3356","mag":"2164479068"},"language":"en","primary_location":{"id":"doi:10.3115/v1/w14-3356","is_oa":true,"landing_page_url":"http://doi.org/10.3115/v1/w14-3356","pdf_url":"https://doi.org/10.3115/v1/w14-3356","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Ninth Workshop on Statistical Machine Translation","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.3115/v1/w14-3356","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Wang Ling","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wang Ling","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047686430","display_name":"Lu\u00eds Marujo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luis Marujo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111222692","display_name":"Chris Dyer","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chris Dyer","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107337645","display_name":"Alan W. Black","orcid":"https://orcid.org/0000-0001-8820-8831"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alan W Black","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5054590402","display_name":"Isabel Trancoso","orcid":"https://orcid.org/0000-0001-5874-6313"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Isabel Trancoso","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":4.6515,"has_fulltext":false,"cited_by_count":23,"citation_normalized_percentile":{"value":0.95093802,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/crowdsourcing","display_name":"Crowdsourcing","score":0.9049190282821655},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7833106517791748},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.5756723284721375},{"id":"https://openalex.org/keywords/data-extraction","display_name":"Data extraction","score":0.4736279845237732},{"id":"https://openalex.org/keywords/data-quality","display_name":"Data quality","score":0.45939135551452637},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.4296163320541382},{"id":"https://openalex.org/keywords/extraction","display_name":"Extraction (chemistry)","score":0.4148736298084259},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.39338481426239014},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.33213818073272705},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.25426316261291504},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.07136803865432739},{"id":"https://openalex.org/keywords/medline","display_name":"MEDLINE","score":0.05863240361213684}],"concepts":[{"id":"https://openalex.org/C62230096","wikidata":"https://www.wikidata.org/wiki/Q275969","display_name":"Crowdsourcing","level":2,"score":0.9049190282821655},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7833106517791748},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.5756723284721375},{"id":"https://openalex.org/C2777466982","wikidata":"https://www.wikidata.org/wiki/Q5227287","display_name":"Data extraction","level":3,"score":0.4736279845237732},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.45939135551452637},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.4296163320541382},{"id":"https://openalex.org/C4725764","wikidata":"https://www.wikidata.org/wiki/Q844704","display_name":"Extraction (chemistry)","level":2,"score":0.4148736298084259},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.39338481426239014},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.33213818073272705},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.25426316261291504},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07136803865432739},{"id":"https://openalex.org/C2779473830","wikidata":"https://www.wikidata.org/wiki/Q1540899","display_name":"MEDLINE","level":2,"score":0.05863240361213684},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3115/v1/w14-3356","is_oa":true,"landing_page_url":"http://doi.org/10.3115/v1/w14-3356","pdf_url":"https://doi.org/10.3115/v1/w14-3356","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Ninth Workshop on Statistical Machine Translation","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.686.8574","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.686.8574","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://anthology.aclweb.org/W/W14/W14-3356.pdf","raw_type":"text"}],"best_oa_location":{"id":"doi:10.3115/v1/w14-3356","is_oa":true,"landing_page_url":"http://doi.org/10.3115/v1/w14-3356","pdf_url":"https://doi.org/10.3115/v1/w14-3356","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Ninth Workshop on Statistical Machine Translation","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.8100000023841858,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2164479068.pdf","grobid_xml":"https://content.openalex.org/works/W2164479068.grobid-xml"},"referenced_works_count":24,"referenced_works":["https://openalex.org/W62473634","https://openalex.org/W129342743","https://openalex.org/W170711724","https://openalex.org/W1489181569","https://openalex.org/W1775899913","https://openalex.org/W2047295649","https://openalex.org/W2049774111","https://openalex.org/W2101105183","https://openalex.org/W2104595351","https://openalex.org/W2104907655","https://openalex.org/W2105673178","https://openalex.org/W2124807415","https://openalex.org/W2127876534","https://openalex.org/W2134800885","https://openalex.org/W2142523187","https://openalex.org/W2143927888","https://openalex.org/W2145262681","https://openalex.org/W2146574666","https://openalex.org/W2163361328","https://openalex.org/W2164806437","https://openalex.org/W2270190199","https://openalex.org/W2401082558","https://openalex.org/W2495359443","https://openalex.org/W3166956191"],"related_works":["https://openalex.org/W3032998312","https://openalex.org/W1503094549","https://openalex.org/W4384486036","https://openalex.org/W135177976","https://openalex.org/W2337920774","https://openalex.org/W4318823662","https://openalex.org/W2886410948","https://openalex.org/W2135768893","https://openalex.org/W176219849","https://openalex.org/W3021414116"],"abstract_inverted_index":{"High-quality":[0],"parallel":[1,25,41,75,119],"data":[2,26],"is":[3,28,91],"crucial":[4],"for":[5],"a":[6,45,112,118],"range":[7],"of":[8,63,87,111],"multilingual":[9],"applications,":[10],"from":[11,58],"tuning":[12,110],"and":[13,116],"evaluating":[14],"machine":[15],"translation":[16],"systems":[17],"to":[18,34,103,126],"cross-lingual":[19],"annotation":[20],"pro-jection.":[21],"Unfortunately,":[22],"automatically":[23,60],"ob-tained":[24],"(which":[27],"available":[29],"in":[30,48,77,108,136],"relative":[31],"abundance)":[32],"tends":[33],"be":[35,133],"quite":[36],"noisy.":[37],"To":[38],"obtain":[39],"high-quality":[40],"data,":[42],"we":[43,71],"introduce":[44],"crowdsourcing":[46],"paradigm":[47],"which":[49],"workers":[50],"with":[51,122],"only":[52],"basic":[53],"bilin-gual":[54],"proficiency":[55],"identify":[56],"translations":[57],"an":[59,100],"extracted":[61],"corpus":[62,90],"par-allel":[64],"microblog":[65,113],"messages.":[66],"For":[67],"less":[68],"than":[69,94],"$350,":[70],"obtained":[72],"over":[73],"5000":[74],"seg-ments":[76],"five":[78],"language":[79],"pairs.":[80],"Evaluated":[81],"against":[82],"expert":[83,104],"annotations,":[84],"the":[85,88],"quality":[86],"crowdsourced":[89,130],"significantly":[92],"better":[93],"existing":[95],"automatic":[96],"methods:":[97],"it":[98,123],"obtains":[99],"performance":[101],"comparable":[102],"annotations":[105],"when":[106],"used":[107],"MERT":[109],"MT":[114],"system;":[115],"training":[117],"sentence":[120],"classifier":[121],"leads":[124],"also":[125],"improved":[127],"results.":[128],"The":[129],"corpora":[131],"will":[132],"made":[134],"avail-able":[135]},"counts_by_year":[{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":4},{"year":2015,"cited_by_count":2},{"year":2014,"cited_by_count":2}],"updated_date":"2026-04-16T08:26:57.006410","created_date":"2025-10-10T00:00:00"}
