{"id":"https://openalex.org/W2782976553","doi":"https://doi.org/10.1109/bigdata.2017.8258222","title":"Data quality challenges with missing values and mixed types in joint sequence analysis","display_name":"Data quality challenges with missing values and mixed types in joint sequence analysis","publication_year":2017,"publication_date":"2017-12-01","ids":{"openalex":"https://openalex.org/W2782976553","doi":"https://doi.org/10.1109/bigdata.2017.8258222","mag":"2782976553"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata.2017.8258222","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2017.8258222","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056799505","display_name":"Alina Lazar","orcid":"https://orcid.org/0000-0002-2096-1541"},"institutions":[{"id":"https://openalex.org/I161203489","display_name":"Youngstown State University","ror":"https://ror.org/038zf2n28","country_code":"US","type":"education","lineage":["https://openalex.org/I161203489"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Alina Lazar","raw_affiliation_strings":["Department of Computer Science and Information Systems, Youngstown State University, Youngstown, OH"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Information Systems, Youngstown State University, Youngstown, OH","institution_ids":["https://openalex.org/I161203489"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102964480","display_name":"Ling Jin","orcid":"https://orcid.org/0000-0002-4381-195X"},"institutions":[{"id":"https://openalex.org/I4210087188","display_name":"Impact Assessment","ror":"https://ror.org/004m2d892","country_code":"US","type":"company","lineage":["https://openalex.org/I4210087188"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ling Jin","raw_affiliation_strings":["Energy Analysis and Environmental Impacts Division, LBNL, Berkeley, CA"],"affiliations":[{"raw_affiliation_string":"Energy Analysis and Environmental Impacts Division, LBNL, Berkeley, CA","institution_ids":["https://openalex.org/I4210087188"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084595934","display_name":"C. Anna Spurlock","orcid":"https://orcid.org/0000-0002-8573-661X"},"institutions":[{"id":"https://openalex.org/I4210087188","display_name":"Impact Assessment","ror":"https://ror.org/004m2d892","country_code":"US","type":"company","lineage":["https://openalex.org/I4210087188"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"C. Anna Spurlock","raw_affiliation_strings":["Energy Analysis and Environmental Impacts Division, LBNL, Berkeley, CA"],"affiliations":[{"raw_affiliation_string":"Energy Analysis and Environmental Impacts Division, LBNL, Berkeley, CA","institution_ids":["https://openalex.org/I4210087188"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043129695","display_name":"Kesheng Wu","orcid":"https://orcid.org/0000-0002-6907-3393"},"institutions":[{"id":"https://openalex.org/I4210087188","display_name":"Impact Assessment","ror":"https://ror.org/004m2d892","country_code":"US","type":"company","lineage":["https://openalex.org/I4210087188"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kesheng Wu","raw_affiliation_strings":["Energy Analysis and Environmental Impacts Division, LBNL, Berkeley, CA"],"affiliations":[{"raw_affiliation_string":"Energy Analysis and Environmental Impacts Division, LBNL, Berkeley, CA","institution_ids":["https://openalex.org/I4210087188"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5068293431","display_name":"Alex Sim","orcid":"https://orcid.org/0000-0002-6295-1982"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alex Sim","raw_affiliation_strings":["Computational Research Division, LBNL, Berkeley, CA"],"affiliations":[{"raw_affiliation_string":"Computational Research Division, LBNL, Berkeley, CA","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5056799505"],"corresponding_institution_ids":["https://openalex.org/I161203489"],"apc_list":null,"apc_paid":null,"fwci":0.9246,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.77133789,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"2620","last_page":"2627"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12205","display_name":"Time Series Analysis and Forecasting","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12205","display_name":"Time Series Analysis and Forecasting","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11901","display_name":"Bayesian Methods and Mixture Models","score":0.9656000137329102,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9559999704360962,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6513972878456116},{"id":"https://openalex.org/keywords/joint","display_name":"Joint (building)","score":0.5895437598228455},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.588078498840332},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.5423704981803894},{"id":"https://openalex.org/keywords/missing-data","display_name":"Missing data","score":0.5417014956474304},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3836354613304138},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.1355101466178894},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09850943088531494}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6513972878456116},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.5895437598228455},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.588078498840332},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.5423704981803894},{"id":"https://openalex.org/C9357733","wikidata":"https://www.wikidata.org/wiki/Q6878417","display_name":"Missing data","level":2,"score":0.5417014956474304},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3836354613304138},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.1355101466178894},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09850943088531494},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C170154142","wikidata":"https://www.wikidata.org/wiki/Q150737","display_name":"Architectural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata.2017.8258222","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2017.8258222","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.8100000023841858,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W190517705","https://openalex.org/W1540089290","https://openalex.org/W1893982075","https://openalex.org/W1928823211","https://openalex.org/W1948664808","https://openalex.org/W1970026646","https://openalex.org/W1977356453","https://openalex.org/W1983381361","https://openalex.org/W1998415639","https://openalex.org/W2026858305","https://openalex.org/W2040605549","https://openalex.org/W2041181884","https://openalex.org/W2053474901","https://openalex.org/W2062123184","https://openalex.org/W2074231493","https://openalex.org/W2091050592","https://openalex.org/W2097747115","https://openalex.org/W2099689252","https://openalex.org/W2106595237","https://openalex.org/W2114741326","https://openalex.org/W2144746259","https://openalex.org/W2161078209","https://openalex.org/W2161375627","https://openalex.org/W2184767146","https://openalex.org/W2188484099","https://openalex.org/W2327620288","https://openalex.org/W2343219532","https://openalex.org/W2398004122","https://openalex.org/W2521564860","https://openalex.org/W2622560707","https://openalex.org/W4285719527","https://openalex.org/W6686631463","https://openalex.org/W6739032777"],"related_works":["https://openalex.org/W4380150146","https://openalex.org/W3024870410","https://openalex.org/W2410652950","https://openalex.org/W4283773154","https://openalex.org/W3139174110","https://openalex.org/W4289597203","https://openalex.org/W2085630472","https://openalex.org/W1977098485","https://openalex.org/W4285201053","https://openalex.org/W2753779043"],"abstract_inverted_index":{"The":[0,112],"goal":[1],"of":[2,10,41,70,131,151,158,205],"this":[3],"paper":[4],"is":[5,34,61,123,188],"to":[6,25,64,114,183],"investigate":[7,155],"the":[8,65,71,116,128,156,181,191,195,203,214],"impact":[9,157],"missing":[11,77,185,207,215],"values":[12,171,208],"in":[13,30,38,162,190],"categorical":[14,66,98,129],"time":[15],"series":[16],"sequences":[17],"on":[18,58,109],"common":[19],"data":[20,33,56,84,91,121,146,186],"analysis":[21],"tasks.":[22],"Being":[23],"able":[24],"more":[26],"effectively":[27],"identify":[28],"patterns":[29],"socio-demographic":[31],"longitudinal":[32,89,100],"an":[35,136],"important":[36],"component":[37],"a":[39,93,124,163],"number":[40],"social":[42],"science":[43],"settings.":[44],"However,":[45],"performing":[46],"fundamental":[47],"analytical":[48],"operations,":[49],"such":[50],"as":[51],"clustering":[52,106],"for":[53,97],"grouping":[54],"these":[55,83,160],"based":[57],"similarity":[59,94,117,165],"patterns,":[60],"challenging":[62],"due":[63],"and":[67,73,78,99,118,172],"multi-dimensional":[68],"nature":[69,130],"data,":[72,101,133],"their":[74],"corruption":[75],"by":[76,202,212],"inconsistent":[79],"values.":[80],"To":[81],"study":[82],"quality":[85],"issues,":[86],"we":[87,134,154,178],"employ":[88,135],"sequence":[90],"representations,":[92],"measure":[95],"designed":[96],"together":[102],"with":[103,169,174],"state-of-the":[104],"art":[105],"methodologies":[107],"reliant":[108],"hierarchical":[110],"algorithms.":[111],"key":[113],"quantifying":[115],"difference":[119],"among":[120],"records":[122],"distance":[125,139],"metric.":[126],"Given":[127],"our":[132],"\u201cedit\u201d":[137],"type":[138],"using":[140],"Optimal":[141],"Matching":[142],"(OM).":[143],"Because":[144],"each":[145],"record":[147],"has":[148],"multiple":[149,175],"variables":[150,161,168],"different":[152],"types,":[153],"mixing":[159],"single":[164],"measure.":[166],"Between":[167],"binary":[170,196],"those":[173],"nominal":[176,192],"values,":[177],"find":[179],"that":[180],"ability":[182],"overcome":[184],"problems":[187],"harder":[189],"domain":[193],"versus":[194],"domain.":[197],"Additionally,":[198],"artificial":[199],"clusters":[200],"introduced":[201],"alignment":[204],"leading":[206],"can":[209],"be":[210],"resolved":[211],"tuning":[213],"value":[216],"substitution":[217],"cost":[218],"parameter.":[219]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
