{"id":"https://openalex.org/W4377140150","doi":"https://doi.org/10.1145/3597305","title":"Pipeline Design for Data Preparation for Social Media Analysis","display_name":"Pipeline Design for Data Preparation for Social Media Analysis","publication_year":2023,"publication_date":"2023-05-20","ids":{"openalex":"https://openalex.org/W4377140150","doi":"https://doi.org/10.1145/3597305"},"language":"en","primary_location":{"id":"doi:10.1145/3597305","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3597305","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3597305","source":{"id":"https://openalex.org/S110189822","display_name":"Journal of Data and Information Quality","issn_l":"1936-1955","issn":["1936-1955","1936-1963"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Data and Information Quality","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3597305","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5027164522","display_name":"Carlo Alberto Bono","orcid":"https://orcid.org/0000-0002-5734-1274"},"institutions":[{"id":"https://openalex.org/I93860229","display_name":"Politecnico di Milano","ror":"https://ror.org/01nffqt88","country_code":"IT","type":"education","lineage":["https://openalex.org/I93860229"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Carlo A. Bono","raw_affiliation_strings":["Dept. of Electronics, Information, and Bioengineering, Politecnico di Milano, Italy"],"affiliations":[{"raw_affiliation_string":"Dept. of Electronics, Information, and Bioengineering, Politecnico di Milano, Italy","institution_ids":["https://openalex.org/I93860229"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063621424","display_name":"Cinzia Cappiello","orcid":"https://orcid.org/0000-0001-6062-5174"},"institutions":[{"id":"https://openalex.org/I93860229","display_name":"Politecnico di Milano","ror":"https://ror.org/01nffqt88","country_code":"IT","type":"education","lineage":["https://openalex.org/I93860229"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Cinzia Cappiello","raw_affiliation_strings":["Dept. of Electronics, Information, and Bioengineering, Politecnico di Milano, Italy"],"affiliations":[{"raw_affiliation_string":"Dept. of Electronics, Information, and Bioengineering, Politecnico di Milano, Italy","institution_ids":["https://openalex.org/I93860229"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046308244","display_name":"Barbara Pernici","orcid":"https://orcid.org/0000-0002-2034-9774"},"institutions":[{"id":"https://openalex.org/I93860229","display_name":"Politecnico di Milano","ror":"https://ror.org/01nffqt88","country_code":"IT","type":"education","lineage":["https://openalex.org/I93860229"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Barbara Pernici","raw_affiliation_strings":["Dept. of Electronics, Information, and Bioengineering, Politecnico di Milano, Italy"],"affiliations":[{"raw_affiliation_string":"Dept. of Electronics, Information, and Bioengineering, Politecnico di Milano, Italy","institution_ids":["https://openalex.org/I93860229"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079266183","display_name":"Edoardo Ramalli","orcid":"https://orcid.org/0000-0002-5124-9047"},"institutions":[{"id":"https://openalex.org/I93860229","display_name":"Politecnico di Milano","ror":"https://ror.org/01nffqt88","country_code":"IT","type":"education","lineage":["https://openalex.org/I93860229"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Edoardo Ramalli","raw_affiliation_strings":["Dept. of Electronics, Information, and Bioengineering, Politecnico di Milano, Italy"],"affiliations":[{"raw_affiliation_string":"Dept. of Electronics, Information, and Bioengineering, Politecnico di Milano, Italy","institution_ids":["https://openalex.org/I93860229"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5010941159","display_name":"Monica Vitali","orcid":"https://orcid.org/0000-0002-5258-1893"},"institutions":[{"id":"https://openalex.org/I93860229","display_name":"Politecnico di Milano","ror":"https://ror.org/01nffqt88","country_code":"IT","type":"education","lineage":["https://openalex.org/I93860229"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Monica Vitali","raw_affiliation_strings":["Dept. of Electronics, Information, and Bioengineering, Politecnico di Milano, Italy"],"affiliations":[{"raw_affiliation_string":"Dept. of Electronics, Information, and Bioengineering, Politecnico di Milano, Italy","institution_ids":["https://openalex.org/I93860229"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5027164522"],"corresponding_institution_ids":["https://openalex.org/I93860229"],"apc_list":null,"apc_paid":null,"fwci":2.8284,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.92649162,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":"15","issue":"4","first_page":"1","last_page":"25"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10799","display_name":"Data Visualization and Analytics","score":0.9891999959945679,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11937","display_name":"Research Data Management Practices","score":0.9789000153541565,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8852000832557678},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.8127726912498474},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.8038412928581238},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6092941761016846},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.5827922821044922},{"id":"https://openalex.org/keywords/data-quality","display_name":"Data quality","score":0.5285795331001282},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.5061954855918884},{"id":"https://openalex.org/keywords/analytics","display_name":"Analytics","score":0.4865335524082184},{"id":"https://openalex.org/keywords/data-analysis","display_name":"Data analysis","score":0.45545026659965515},{"id":"https://openalex.org/keywords/social-media","display_name":"Social media","score":0.4440566599369049},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4132075309753418},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3791238069534302},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.27070140838623047},{"id":"https://openalex.org/keywords/service","display_name":"Service (business)","score":0.08859321475028992}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8852000832557678},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.8127726912498474},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.8038412928581238},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6092941761016846},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.5827922821044922},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.5285795331001282},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.5061954855918884},{"id":"https://openalex.org/C79158427","wikidata":"https://www.wikidata.org/wiki/Q485396","display_name":"Analytics","level":2,"score":0.4865335524082184},{"id":"https://openalex.org/C175801342","wikidata":"https://www.wikidata.org/wiki/Q1988917","display_name":"Data analysis","level":2,"score":0.45545026659965515},{"id":"https://openalex.org/C518677369","wikidata":"https://www.wikidata.org/wiki/Q202833","display_name":"Social media","level":2,"score":0.4440566599369049},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4132075309753418},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3791238069534302},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.27070140838623047},{"id":"https://openalex.org/C2780378061","wikidata":"https://www.wikidata.org/wiki/Q25351891","display_name":"Service (business)","level":2,"score":0.08859321475028992},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C136264566","wikidata":"https://www.wikidata.org/wiki/Q159810","display_name":"Economy","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3597305","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3597305","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3597305","source":{"id":"https://openalex.org/S110189822","display_name":"Journal of Data and Information Quality","issn_l":"1936-1955","issn":["1936-1955","1936-1963"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Data and Information Quality","raw_type":"journal-article"},{"id":"pmh:oai:re.public.polimi.it:11311/1235292","is_oa":true,"landing_page_url":"https://hdl.handle.net/11311/1235292","pdf_url":"https://re.public.polimi.it/bitstream/11311/1235292/1/Data_Analysis_Pipeline_JDIQ__preprint.pdf","source":{"id":"https://openalex.org/S4306400312","display_name":"Virtual Community of Pathological Anatomy (University of Castilla La Mancha)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79189158","host_organization_name":"University of Castilla-La Mancha","host_organization_lineage":["https://openalex.org/I79189158"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":{"id":"doi:10.1145/3597305","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3597305","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3597305","source":{"id":"https://openalex.org/S110189822","display_name":"Journal of Data and Information Quality","issn_l":"1936-1955","issn":["1936-1955","1936-1963"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Data and Information Quality","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.800000011920929}],"awards":[],"funders":[{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"},{"id":"https://openalex.org/F4320332999","display_name":"Horizon 2020 Framework Programme","ror":"https://ror.org/00k4n6c32"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4377140150.pdf","grobid_xml":"https://content.openalex.org/works/W4377140150.grobid-xml"},"referenced_works_count":25,"referenced_works":["https://openalex.org/W1984243510","https://openalex.org/W2108598243","https://openalex.org/W2166202924","https://openalex.org/W2302501749","https://openalex.org/W2732026016","https://openalex.org/W2766945679","https://openalex.org/W2774938489","https://openalex.org/W2794512438","https://openalex.org/W2964514192","https://openalex.org/W2990714382","https://openalex.org/W2995822546","https://openalex.org/W3035874221","https://openalex.org/W3132433578","https://openalex.org/W3132668218","https://openalex.org/W3163948101","https://openalex.org/W3185884151","https://openalex.org/W3185956072","https://openalex.org/W3199096880","https://openalex.org/W3212368439","https://openalex.org/W4200535642","https://openalex.org/W4237775487","https://openalex.org/W4243044468","https://openalex.org/W4290057560","https://openalex.org/W6646426903","https://openalex.org/W6735926645"],"related_works":["https://openalex.org/W4226266853","https://openalex.org/W4210252074","https://openalex.org/W3092201768","https://openalex.org/W2796632413","https://openalex.org/W2740083192","https://openalex.org/W4382315681","https://openalex.org/W2794907032","https://openalex.org/W4255802207","https://openalex.org/W3123352720","https://openalex.org/W2462007151"],"abstract_inverted_index":{"In":[0,82,128],"a":[1,38,46,69,133,148,164,196,237,251,259],"data-driven":[2],"culture,":[3],"in":[4,195,270],"which":[5],"analytics":[6],"applications":[7],"are":[8,143],"the":[9,15,51,57,78,94,102,108,118,138,152,155,171,182,188,192,205,222,226,242,246,256],"main":[10],"resources":[11],"for":[12,101,104,112,117,151,167,213,261],"supporting":[13],"decision-making,":[14],"use":[16],"of":[17,44,120,140,154,157,191,201,240,258,264],"high-quality":[18],"datasets":[19,263],"is":[20,48,64,229],"mandatory":[21],"to":[22,34,77,93,96,136,145,204,235,272],"minimize":[23],"errors":[24],"and":[25,116,169,177,217],"risks.":[26],"For":[27],"this":[28,129,220],"reason,":[29],"data":[30,39,52,158,172,183,215],"analysis":[31,156],"tasks":[32,207],"need":[33,95],"be":[35],"preceded":[36],"by":[37,67,245],"preparation":[40,173],"pipeline.":[41],"The":[42,199],"design":[43,139,257],"such":[45],"pipeline":[47,174,227,247,260],"not":[49,74],"trivial:":[50],"analyst":[53,184],"must":[54],"carefully":[55],"choose":[56],"appropriate":[58],"operations":[59],"considering":[60],"several":[61],"aspects.":[62],"This":[63],"often":[65],"performed":[66,244],"adopting":[68],"trial-and-error":[70],"approach":[71,135],"that":[72,142],"does":[73],"always":[75],"lead":[76],"most":[79],"effective":[80],"solution.":[81],"addition,":[83],"extracting":[84],"information":[85,186],"from":[86,107,159,225,267],"social":[87,160,268],"media":[88,269],"poses":[89],"specific":[90],"problems":[91],"due":[92],"consider":[97,255],"only":[98],"posts":[99,124],"relevant":[100,149],"analysis,":[103],"its":[105,113],"dependence":[106],"context":[109],"being":[110],"considered,":[111],"multimedia":[114],"contents,":[115],"risk":[119],"filtering":[121],"out":[122],"informative":[123],"with":[125,175,232],"automatic":[126],"filters.":[127],"article,":[130],"we":[131,254],"propose":[132],"systematic":[134],"support":[137],"pipelines":[141],"able":[144],"effectively":[146],"extract":[147],"dataset":[150,194,223],"goal":[153],"media.":[161],"We":[162],"provide":[163],"conceptual":[165],"model":[166],"designing":[168],"annotating":[170],"quality":[176,190],"performance":[178],"information,":[179],"thus":[180],"providing":[181],"preliminary":[185],"on":[187,248],"expected":[189],"resulting":[193,224],"context-aware":[197],"manner.":[198],"generation":[200],"metadata":[202,234],"related":[203],"processing":[206],"has":[208],"been":[209],"recognized":[210],"as":[211],"essential":[212],"enabling":[214],"sharing":[216],"reusability.":[218],"To":[219],"aim,":[221],"application":[228],"automatically":[230],"annotated":[231],"provenance":[233],"get":[236],"detailed":[238],"description":[239],"all":[241],"activities":[243],"them.":[249],"As":[250],"case":[252],"study,":[253],"creating":[262],"images":[265],"extracted":[266],"order":[271],"analyze":[273],"behavioural":[274],"aspects":[275],"during":[276],"COVID-19.":[277]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":4}],"updated_date":"2026-03-09T08:58:05.943551","created_date":"2025-10-10T00:00:00"}
