{"id":"https://openalex.org/W2008127487","doi":"https://doi.org/10.1145/2483669.2483676","title":"Paraphrase acquisition via crowdsourcing and machine learning","display_name":"Paraphrase acquisition via crowdsourcing and machine learning","publication_year":2013,"publication_date":"2013-06-01","ids":{"openalex":"https://openalex.org/W2008127487","doi":"https://doi.org/10.1145/2483669.2483676","mag":"2008127487"},"language":"en","primary_location":{"id":"doi:10.1145/2483669.2483676","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2483669.2483676","pdf_url":null,"source":{"id":"https://openalex.org/S2492086750","display_name":"ACM Transactions on Intelligent Systems and Technology","issn_l":"2157-6904","issn":["2157-6904","2157-6912"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Intelligent Systems and Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5042650748","display_name":"Steven Burrows","orcid":null},"institutions":[{"id":"https://openalex.org/I51441396","display_name":"Bauhaus-Universit\u00e4t Weimar","ror":"https://ror.org/033bb5z47","country_code":"DE","type":"education","lineage":["https://openalex.org/I51441396"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Steven Burrows","raw_affiliation_strings":["Bauhaus-Universit\u00e4t Weimar, Germany","[Bauhaus Universitat Weimar, Germany]"],"affiliations":[{"raw_affiliation_string":"Bauhaus-Universit\u00e4t Weimar, Germany","institution_ids":["https://openalex.org/I51441396"]},{"raw_affiliation_string":"[Bauhaus Universitat Weimar, Germany]","institution_ids":["https://openalex.org/I51441396"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083712311","display_name":"Martin Potthast","orcid":"https://orcid.org/0000-0003-2451-0665"},"institutions":[{"id":"https://openalex.org/I51441396","display_name":"Bauhaus-Universit\u00e4t Weimar","ror":"https://ror.org/033bb5z47","country_code":"DE","type":"education","lineage":["https://openalex.org/I51441396"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Martin Potthast","raw_affiliation_strings":["Bauhaus-Universit\u00e4t Weimar, Germany","[Bauhaus Universitat Weimar, Germany]"],"affiliations":[{"raw_affiliation_string":"Bauhaus-Universit\u00e4t Weimar, Germany","institution_ids":["https://openalex.org/I51441396"]},{"raw_affiliation_string":"[Bauhaus Universitat Weimar, Germany]","institution_ids":["https://openalex.org/I51441396"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5027915931","display_name":"Benno Stein","orcid":"https://orcid.org/0000-0001-9033-2217"},"institutions":[{"id":"https://openalex.org/I51441396","display_name":"Bauhaus-Universit\u00e4t Weimar","ror":"https://ror.org/033bb5z47","country_code":"DE","type":"education","lineage":["https://openalex.org/I51441396"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Benno Stein","raw_affiliation_strings":["Bauhaus-Universit\u00e4t Weimar, Germany","[Bauhaus Universitat Weimar, Germany]"],"affiliations":[{"raw_affiliation_string":"Bauhaus-Universit\u00e4t Weimar, Germany","institution_ids":["https://openalex.org/I51441396"]},{"raw_affiliation_string":"[Bauhaus Universitat Weimar, Germany]","institution_ids":["https://openalex.org/I51441396"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5042650748"],"corresponding_institution_ids":["https://openalex.org/I51441396"],"apc_list":null,"apc_paid":null,"fwci":18.4867,"has_fulltext":false,"cited_by_count":93,"citation_normalized_percentile":{"value":0.99152484,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":"4","issue":"3","first_page":"1","last_page":"21"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/paraphrase","display_name":"Paraphrase","score":0.9830946922302246},{"id":"https://openalex.org/keywords/crowdsourcing","display_name":"Crowdsourcing","score":0.910215437412262},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8002031445503235},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6729249358177185},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6238276958465576},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.5735183954238892},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4235169291496277},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.41364413499832153},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.2441374957561493}],"concepts":[{"id":"https://openalex.org/C2780922921","wikidata":"https://www.wikidata.org/wiki/Q255189","display_name":"Paraphrase","level":2,"score":0.9830946922302246},{"id":"https://openalex.org/C62230096","wikidata":"https://www.wikidata.org/wiki/Q275969","display_name":"Crowdsourcing","level":2,"score":0.910215437412262},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8002031445503235},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6729249358177185},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6238276958465576},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.5735183954238892},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4235169291496277},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.41364413499832153},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.2441374957561493},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/2483669.2483676","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2483669.2483676","pdf_url":null,"source":{"id":"https://openalex.org/S2492086750","display_name":"ACM Transactions on Intelligent Systems and Technology","issn_l":"2157-6904","issn":["2157-6904","2157-6912"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Intelligent Systems and Technology","raw_type":"journal-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.302.9398","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.302.9398","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.uni-weimar.de/medien/webis/publications/papers/stein_2013za.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.639.5323","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.639.5323","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.uni-weimar.de/medien/webis/publications/papers/stein_2013c.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320308943","display_name":"Microsoft Research","ror":"https://ror.org/00d0nc645"},{"id":"https://openalex.org/F4320316798","display_name":"Minnesota Soybean Research and Promotion Council","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W30768373","https://openalex.org/W86855850","https://openalex.org/W131533222","https://openalex.org/W599327533","https://openalex.org/W1488833649","https://openalex.org/W1514017746","https://openalex.org/W1522553155","https://openalex.org/W1544505227","https://openalex.org/W1554365104","https://openalex.org/W1566376227","https://openalex.org/W1947252915","https://openalex.org/W1966119171","https://openalex.org/W1967043694","https://openalex.org/W1970381522","https://openalex.org/W1974336599","https://openalex.org/W1990524510","https://openalex.org/W2031842255","https://openalex.org/W2051593977","https://openalex.org/W2056655097","https://openalex.org/W2060376762","https://openalex.org/W2093219119","https://openalex.org/W2099884836","https://openalex.org/W2101105183","https://openalex.org/W2103081392","https://openalex.org/W2113342230","https://openalex.org/W2119465309","https://openalex.org/W2127008633","https://openalex.org/W2127849236","https://openalex.org/W2129444086","https://openalex.org/W2129468719","https://openalex.org/W2133075018","https://openalex.org/W2133182690","https://openalex.org/W2133798162","https://openalex.org/W2138810035","https://openalex.org/W2142120379","https://openalex.org/W2143539737","https://openalex.org/W2150740657","https://openalex.org/W2151401338","https://openalex.org/W2164290393","https://openalex.org/W2166245164","https://openalex.org/W2169813772","https://openalex.org/W2170989440","https://openalex.org/W2250305120","https://openalex.org/W2892737606","https://openalex.org/W3017975492","https://openalex.org/W3105439152","https://openalex.org/W6603477831"],"related_works":["https://openalex.org/W2978707643","https://openalex.org/W4378713476","https://openalex.org/W2294233724","https://openalex.org/W2169813772","https://openalex.org/W2736149021","https://openalex.org/W2007563177","https://openalex.org/W4248451614","https://openalex.org/W4310803295","https://openalex.org/W1973985309","https://openalex.org/W3132357981"],"abstract_inverted_index":{"To":[0],"paraphrase":[1,35,221],"means":[2,72],"to":[3,34,129,196,207,284,328,338],"rewrite":[4],"content":[5],"while":[6,320],"preserving":[7],"the":[8,26,62,73,82,99,104,136,148,183,193,198,267,285,294,308,314,329,336,340],"original":[9],"meaning.":[10],"Paraphrasing":[11],"is":[12,65,76,87,317,335],"important":[13],"in":[14,20,107,125,191,215,272],"fields":[15],"such":[16,70],"as":[17],"text":[18],"reuse":[19],"journalism,":[21],"anonymizing":[22],"work,":[23],"and":[24,37,53,79,109,164,189,224,236,277,342],"improving":[25],"quality":[27,67],"of":[28,56,61,84,93,103,127,147,252,281,326],"customer-written":[29],"reviews.":[30],"This":[31,155,245],"article":[32],"contributes":[33],"acquisition":[36,50,55],"focuses":[38],"on":[39],"two":[40],"aspects":[41],"that":[42,101,168,227,248,266,313],"are":[43,123],"not":[44,77],"addressed":[45],"by":[46],"current":[47],"research:":[48],"(1)":[49],"via":[51],"crowdsourcing,":[52],"(2)":[54],"passage-level":[57,158,210],"samples.":[58],"The":[59,95,200,331],"challenge":[60],"first":[63,337],"aspect":[64,97],"automatic":[66],"assurance;":[68],"without":[69,80],"a":[71,216,228,273,289],"crowdsourcing":[74,81],"paradigm":[75],"effective,":[78],"creation":[83],"test":[85],"corpora":[86],"unacceptably":[88],"expensive":[89],"for":[90,132,176,307],"realistic":[91],"order":[92],"magnitudes.":[94],"second":[96],"addresses":[98],"deficit":[100],"most":[102],"previous":[105],"work":[106,332],"generating":[108],"evaluating":[110],"paraphrases":[111,117,159,211],"has":[112],"been":[113],"conducted":[114],"using":[115,172,220],"sentence-level":[116],"or":[118],"shorter;":[119],"these":[120],"short-sample":[121],"analyses":[122],"limited":[124],"terms":[126],"application":[128],"plagiarism":[130,152,343],"detection,":[131],"example.":[133],"We":[134],"present":[135],"Webis":[137],"Crowd":[138],"Paraphrase":[139],"Corpus":[140],"2011":[141],"(Webis-CPC-11),":[142],"which":[143],"recently":[144],"formed":[145],"part":[146],"PAN":[149,187],"2010":[150],"international":[151],"detection":[153],"competition.":[154],"corpus":[156,291,301],"comprises":[157],"with":[160,239],"4067":[161],"positive":[162],"samples":[163,167,238,254],"3792":[165],"negative":[166],"failed":[169],"our":[170,253,262],"criteria,":[171],"Amazon's":[173],"Mechanical":[174],"Turk":[175],"crowdsourcing.":[177],"In":[178],"this":[179],"article,":[180],"we":[181,225,269,311],"review":[182],"lessons":[184],"learned":[185],"at":[186,242],"2010,":[188],"explain":[190],"detail":[192],"method":[194],"used":[195],"construct":[197],"corpus.":[199],"empirical":[201],"contributions":[202],"include":[203],"machine":[204],"learning":[205],"experiments":[206],"explore":[208],"if":[209],"can":[212,231],"be":[213,256],"identified":[214],"two-class":[217],"classification":[218],"problem":[219],"similarity":[222],"features,":[223],"find":[226],"k-nearest-neighbor":[229],"classifier":[230],"correctly":[232],"distinguish":[233],"between":[234],"paraphrased":[235],"nonparaphrased":[237],"0.980":[240],"precision":[241],"0.523":[243],"recall.":[244],"result":[246],"implies":[247],"just":[249],"under":[250],"half":[251],"must":[255],"discarded":[257],"(remaining":[258],"0.477":[259],"fraction),":[260],"but":[261],"cost":[263,318],"analysis":[264],"shows":[265],"automation":[268],"introduce":[270],"results":[271],"18%":[274],"financial":[275,315],"saving":[276],"over":[278,323],"100":[279],"hours":[280,325],"time":[282,327],"returned":[283],"researchers":[286],"when":[287,297],"repeating":[288],"similar":[290],"design.":[292],"On":[293],"other":[295],"hand,":[296],"building":[298],"an":[299],"unrelated":[300],"requiring,":[302],"say,":[303],"25%":[304],"training":[305],"data":[306],"automated":[309],"component,":[310],"show":[312],"outcome":[316],"neutral,":[319],"still":[321],"returning":[322],"70":[324],"researchers.":[330],"presented":[333],"here":[334],"join":[339],"paraphrasing":[341],"communities.":[344]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":9},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":6},{"year":2019,"cited_by_count":7},{"year":2018,"cited_by_count":6},{"year":2017,"cited_by_count":8},{"year":2016,"cited_by_count":8},{"year":2015,"cited_by_count":7},{"year":2014,"cited_by_count":12},{"year":2013,"cited_by_count":10},{"year":2012,"cited_by_count":4}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
