{"id":"https://openalex.org/W3093524452","doi":"https://doi.org/10.1007/s41019-020-00144-y","title":"Parrot: A Progressive Analysis System on Large Text Collections","display_name":"Parrot: A Progressive Analysis System on Large Text Collections","publication_year":2020,"publication_date":"2020-10-22","ids":{"openalex":"https://openalex.org/W3093524452","doi":"https://doi.org/10.1007/s41019-020-00144-y","mag":"3093524452"},"language":"en","primary_location":{"id":"doi:10.1007/s41019-020-00144-y","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s41019-020-00144-y","pdf_url":"https://link.springer.com/content/pdf/10.1007/s41019-020-00144-y.pdf","source":{"id":"https://openalex.org/S2486411021","display_name":"Data Science and Engineering","issn_l":"2364-1185","issn":["2364-1185","2364-1541"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data Science and Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://link.springer.com/content/pdf/10.1007/s41019-020-00144-y.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102915156","display_name":"Yazhong Zhang","orcid":"https://orcid.org/0000-0002-5144-1928"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yazhong Zhang","raw_affiliation_strings":["School of Computer Science, Fudan University, Shanghai, China","Shanghai Key Laboratory of Data Science, Shanghai, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]},{"raw_affiliation_string":"Shanghai Key Laboratory of Data Science, Shanghai, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114859774","display_name":"Hanbing Zhang","orcid":"https://orcid.org/0000-0003-4987-932X"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hanbing Zhang","raw_affiliation_strings":["School of Computer Science, Fudan University, Shanghai, China","Shanghai Key Laboratory of Data Science, Shanghai, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]},{"raw_affiliation_string":"Shanghai Key Laboratory of Data Science, Shanghai, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059000467","display_name":"Zhenying He","orcid":"https://orcid.org/0000-0002-2926-4814"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenying He","raw_affiliation_strings":["School of Computer Science, Fudan University, Shanghai, China","Shanghai Key Laboratory of Data Science, Shanghai, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]},{"raw_affiliation_string":"Shanghai Key Laboratory of Data Science, Shanghai, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087411191","display_name":"Yinan Jing","orcid":"https://orcid.org/0000-0002-1169-8032"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yinan Jing","raw_affiliation_strings":["School of Computer Science, Fudan University, Shanghai, China","Shanghai Key Laboratory of Data Science, Shanghai, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]},{"raw_affiliation_string":"Shanghai Key Laboratory of Data Science, Shanghai, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100323896","display_name":"Kai Zhang","orcid":"https://orcid.org/0000-0001-7518-5466"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kai Zhang","raw_affiliation_strings":["School of Computer Science, Fudan University, Shanghai, China","Shanghai Key Laboratory of Data Science, Shanghai, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]},{"raw_affiliation_string":"Shanghai Key Laboratory of Data Science, Shanghai, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5072790845","display_name":"X. Sean Wang","orcid":"https://orcid.org/0000-0002-9059-3713"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]},{"id":"https://openalex.org/I4210120250","display_name":"Shanghai Technical Institute of Electronics & Information","ror":"https://ror.org/01wh3jw63","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210120250"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"X. Sean Wang","raw_affiliation_strings":["School of Computer Science, Fudan University, Shanghai, China","Shanghai Institute of Intelligent Electronics and Systems, Shanghai, China","Shanghai Key Laboratory of Data Science, Shanghai, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]},{"raw_affiliation_string":"Shanghai Institute of Intelligent Electronics and Systems, Shanghai, China","institution_ids":["https://openalex.org/I4210120250"]},{"raw_affiliation_string":"Shanghai Key Laboratory of Data Science, Shanghai, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5087411191"],"corresponding_institution_ids":["https://openalex.org/I24943067"],"apc_list":null,"apc_paid":null,"fwci":0.81,"has_fulltext":true,"cited_by_count":6,"citation_normalized_percentile":{"value":0.76011044,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"6","issue":"1","first_page":"1","last_page":"19"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10799","display_name":"Data Visualization and Analytics","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8882335424423218},{"id":"https://openalex.org/keywords/spark","display_name":"SPARK (programming language)","score":0.6453776955604553},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.6264355182647705},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.5913457870483398},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.4209536015987396},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.34487491846084595},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.28101640939712524}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8882335424423218},{"id":"https://openalex.org/C2781215313","wikidata":"https://www.wikidata.org/wiki/Q3493345","display_name":"SPARK (programming language)","level":2,"score":0.6453776955604553},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.6264355182647705},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.5913457870483398},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.4209536015987396},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.34487491846084595},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.28101640939712524},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1007/s41019-020-00144-y","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s41019-020-00144-y","pdf_url":"https://link.springer.com/content/pdf/10.1007/s41019-020-00144-y.pdf","source":{"id":"https://openalex.org/S2486411021","display_name":"Data Science and Engineering","issn_l":"2364-1185","issn":["2364-1185","2364-1541"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data Science and Engineering","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:4e4b232a153c4ce3babe9b8960b67da3","is_oa":true,"landing_page_url":"https://doaj.org/article/4e4b232a153c4ce3babe9b8960b67da3","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Data Science and Engineering, Vol 6, Iss 1, Pp 1-19 (2020)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1007/s41019-020-00144-y","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s41019-020-00144-y","pdf_url":"https://link.springer.com/content/pdf/10.1007/s41019-020-00144-y.pdf","source":{"id":"https://openalex.org/S2486411021","display_name":"Data Science and Engineering","issn_l":"2364-1185","issn":["2364-1185","2364-1541"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data Science and Engineering","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G7851391746","display_name":"\u9762\u5411\u590d\u6742\u67e5\u8be2\u7684\u5f02\u8d28\u5a92\u4f53\u641c\u7d22\u7406\u8bba\u4e0e\u65b9\u6cd5\u7814\u7a76","funder_award_id":"61732004","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8118681200","display_name":null,"funder_award_id":"2018YFB1402600","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W3093524452.pdf"},"referenced_works_count":33,"referenced_works":["https://openalex.org/W105408422","https://openalex.org/W1973382888","https://openalex.org/W1978455388","https://openalex.org/W2002791618","https://openalex.org/W2024562752","https://openalex.org/W2031692264","https://openalex.org/W2071989194","https://openalex.org/W2074489032","https://openalex.org/W2081315591","https://openalex.org/W2110363867","https://openalex.org/W2117897510","https://openalex.org/W2120176508","https://openalex.org/W2138759702","https://openalex.org/W2139276812","https://openalex.org/W2161768947","https://openalex.org/W2296677182","https://openalex.org/W2438261314","https://openalex.org/W2483942986","https://openalex.org/W2512646345","https://openalex.org/W2731887386","https://openalex.org/W2732582420","https://openalex.org/W2795530455","https://openalex.org/W2810329283","https://openalex.org/W2891345706","https://openalex.org/W2946789774","https://openalex.org/W2949432502","https://openalex.org/W2950416404","https://openalex.org/W2952451116","https://openalex.org/W3100965700","https://openalex.org/W4242587584","https://openalex.org/W4244777963","https://openalex.org/W4248027902","https://openalex.org/W4251436587"],"related_works":["https://openalex.org/W4390608645","https://openalex.org/W2960264696","https://openalex.org/W3090563135","https://openalex.org/W2497432351","https://openalex.org/W4206777497","https://openalex.org/W2766461310","https://openalex.org/W4247566972","https://openalex.org/W4388692845","https://openalex.org/W3202731209","https://openalex.org/W3211874991"],"abstract_inverted_index":{"Abstract":[0],"The":[1,32,76],"size":[2],"of":[3,21,30,90,118,138],"textual":[4,37],"data":[5,38,144],"continues":[6],"to":[7,55,101,122,145,157],"grow":[8],"along":[9],"with":[10,27],"the":[11,19,28,119,165,170],"need":[12],"for":[13,48,93],"timely":[14],"and":[15,44,85,108,141],"cost-effective":[16],"analysis,":[17],"while":[18,164],"growth":[20,29],"computation":[22],"power":[23],"cannot":[24],"keep":[25],"up":[26],"data.":[31],"delays":[33],"when":[34],"processing":[35,66],"huge":[36],"can":[39],"negatively":[40],"impact":[41],"user":[42],"activity":[43],"insight.":[45],"This":[46],"calls":[47],"a":[49,63,88,94,98,103,111,115,159],"paradigm":[50],"shift":[51],"from":[52],"blocking":[53],"fashion":[54],"progressive":[56,65,99],"processing.":[57],"In":[58],"this":[59,129],"paper,":[60],"we":[61,113],"propose":[62],"sample-based":[64],"model":[67,77],"that":[68,151],"focuses":[69],"on":[70,74,80,136],"term":[71],"frequency":[72],"calculation":[73],"text.":[75],"is":[78,154],"based":[79],"an":[81],"incremental":[82],"execution":[83],"engine":[84],"will":[86],"calculate":[87],"series":[89],"approximate":[91],"results":[92,172],"single":[95],"query":[96],"in":[97,131],"way":[100],"provide":[102],"smooth":[104],"trade-off":[105],"between":[106],"accuracy":[107],"latency.":[109],"As":[110],"part,":[112],"proposed":[114],"new":[116],"variant":[117],"bootstrap":[120],"technique":[121],"quantify":[123],"result":[124,160],"error":[125,163],"progressively.":[126],"We":[127],"implemented":[128],"method":[130,153],"our":[132,152],"system":[133],"called":[134],"Parrot":[135],"top":[137],"Apache":[139],"Spark":[140],"used":[142],"real-world":[143],"test":[146],"its":[147],"performance.":[148],"Experiments":[149],"demonstrate":[150],"2.4\u00d7\u201319.7\u00d7":[155],"faster":[156],"get":[158],"within":[161],"1%":[162],"confidence":[166],"interval":[167],"always":[168],"covers":[169],"accurate":[171],"very":[173],"well.":[174]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":1}],"updated_date":"2026-06-06T09:05:17.133730","created_date":"2025-10-10T00:00:00"}
