{"id":"https://openalex.org/W2784233715","doi":"https://doi.org/10.1109/bigdata.2017.8258275","title":"Why-Diff: Explaining differences amongst similar workflow runs by exploiting scientific metadata","display_name":"Why-Diff: Explaining differences amongst similar workflow runs by exploiting scientific metadata","publication_year":2017,"publication_date":"2017-12-01","ids":{"openalex":"https://openalex.org/W2784233715","doi":"https://doi.org/10.1109/bigdata.2017.8258275","mag":"2784233715"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata.2017.8258275","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2017.8258275","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068813254","display_name":"Priyaa Thavasimani","orcid":"https://orcid.org/0000-0002-4846-1661"},"institutions":[{"id":"https://openalex.org/I84884186","display_name":"Newcastle University","ror":"https://ror.org/01kj2bm70","country_code":"GB","type":"education","lineage":["https://openalex.org/I84884186"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Priyaa Thavasimani","raw_affiliation_strings":["School of Computing Science, Newcastle University, UK"],"affiliations":[{"raw_affiliation_string":"School of Computing Science, Newcastle University, UK","institution_ids":["https://openalex.org/I84884186"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052190130","display_name":"Jacek Ca\u0142a","orcid":"https://orcid.org/0000-0002-8322-4370"},"institutions":[{"id":"https://openalex.org/I84884186","display_name":"Newcastle University","ror":"https://ror.org/01kj2bm70","country_code":"GB","type":"education","lineage":["https://openalex.org/I84884186"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jacek Cala","raw_affiliation_strings":["School of Computing Science, Newcastle University, UK"],"affiliations":[{"raw_affiliation_string":"School of Computing Science, Newcastle University, UK","institution_ids":["https://openalex.org/I84884186"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5018918066","display_name":"Paolo Missier","orcid":"https://orcid.org/0000-0002-0978-2446"},"institutions":[{"id":"https://openalex.org/I84884186","display_name":"Newcastle University","ror":"https://ror.org/01kj2bm70","country_code":"GB","type":"education","lineage":["https://openalex.org/I84884186"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Paolo Missier","raw_affiliation_strings":["School of Computing Science, Newcastle University, UK"],"affiliations":[{"raw_affiliation_string":"School of Computing Science, Newcastle University, UK","institution_ids":["https://openalex.org/I84884186"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5068813254"],"corresponding_institution_ids":["https://openalex.org/I84884186"],"apc_list":null,"apc_paid":null,"fwci":1.2647,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.89026472,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"7525","issue":null,"first_page":"3031","last_page":"3041"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11937","display_name":"Research Data Management Practices","score":0.9876000285148621,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9819999933242798,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.9191717505455017},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.8217242360115051},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8170448541641235},{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.7488976716995239},{"id":"https://openalex.org/keywords/workflow-engine","display_name":"Workflow engine","score":0.5205743908882141},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4983351230621338},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.4547008275985718},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.32467395067214966},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.2913912534713745},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.2511715292930603},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.23158997297286987}],"concepts":[{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.9191717505455017},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.8217242360115051},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8170448541641235},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.7488976716995239},{"id":"https://openalex.org/C188220564","wikidata":"https://www.wikidata.org/wiki/Q3325097","display_name":"Workflow engine","level":3,"score":0.5205743908882141},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4983351230621338},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.4547008275985718},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.32467395067214966},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.2913912534713745},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.2511715292930603},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.23158997297286987}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/bigdata.2017.8258275","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2017.8258275","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.atira.dk:openaire_cris_publications/06413964-3a95-4057-b259-e86118007eda","is_oa":false,"landing_page_url":"https://research.manchester.ac.uk/en/publications/06413964-3a95-4057-b259-e86118007eda","pdf_url":null,"source":{"id":"https://openalex.org/S4306400662","display_name":"Research Explorer (The University of Manchester)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I28407311","host_organization_name":"University of Manchester","host_organization_lineage":["https://openalex.org/I28407311"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Thavasimani, P, Cala, J & Missier, P 2017, 'Why-Diff : Explaining differences amongst similar workflow runs by exploiting scientific metadata', Paper presented at 2017 IEEE International Conference on Big Data (Big Data), Boston, United States, 11/12/17 - 14/12/17 pp. 3031-3041. https://doi.org/10.1109/BigData.2017.8258275","raw_type":"info:eu-repo/semantics/conferenceObject"},{"id":"pmh:oai:pure.atira.dk:openaire_cris_publications/ff2a70eb-4fcd-4c19-979e-0eae43823587","is_oa":false,"landing_page_url":"https://research.birmingham.ac.uk/en/publications/ff2a70eb-4fcd-4c19-979e-0eae43823587","pdf_url":null,"source":{"id":"https://openalex.org/S4306402634","display_name":"University of Birmingham Research Portal (University of Birmingham)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79619799","host_organization_name":"University of Birmingham","host_organization_lineage":["https://openalex.org/I79619799"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Thavasimani , P , Cala , J & Missier , P 2017 , Why-Diff : Explaining differences amongst similar workflow runs by exploiting scientific metadata . in J-Y Nie , Z Obradovic , T Suzumura , R Ghosh , R Nambiar , C Wang , H Zang , R Baeza-Yates , R Baeza-Yates , X Hu , J Kepner , A Cuzzocrea , J Tang & M Toyoda (eds) , Proceedings - 2017 IEEE International Conference on Big Data, Big Data 2017 . Proceedings - 2017 IEEE International Conference on Big Data, Big Data 2017 , vol. 2018-January , Institute of Electrical and Electronics Engineers (IEEE) , pp. 3031-3041 , 5th IEEE International Conference on Big Data, Big Data 2017 , Boston , United States , 11/12/17 . https://doi.org/10.1109/BigData.2017.8258275","raw_type":"contributionToPeriodical"},{"id":"pmh:oai:pure.atira.dk:publications/06413964-3a95-4057-b259-e86118007eda","is_oa":false,"landing_page_url":"http://ieeexplore.ieee.org/document/8258275/","pdf_url":null,"source":{"id":"https://openalex.org/S4306400662","display_name":"Research Explorer (The University of Manchester)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I28407311","host_organization_name":"University of Manchester","host_organization_lineage":["https://openalex.org/I28407311"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Thavasimani, P, Cala, J & Missier, P 2017, 'Why-Diff : Explaining differences amongst similar workflow runs by exploiting scientific metadata', Paper presented at 2017 IEEE International Conference on Big Data (Big Data), Boston, United States, 11/12/17 - 14/12/17 pp. 3031-3041. https://doi.org/10.1109/BigData.2017.8258275","raw_type":"info:eu-repo/semantics/conferenceObject"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W1575885106","https://openalex.org/W1608781114","https://openalex.org/W1814568923","https://openalex.org/W1978802375","https://openalex.org/W1979461729","https://openalex.org/W1983833794","https://openalex.org/W2027172230","https://openalex.org/W2045005060","https://openalex.org/W2072876893","https://openalex.org/W2079336662","https://openalex.org/W2082256810","https://openalex.org/W2096435507","https://openalex.org/W2102207586","https://openalex.org/W2116783139","https://openalex.org/W2130126144","https://openalex.org/W2131966673","https://openalex.org/W2135729589","https://openalex.org/W2142188478","https://openalex.org/W2142493985","https://openalex.org/W2145154883","https://openalex.org/W2172143128","https://openalex.org/W2221287604","https://openalex.org/W2257736573","https://openalex.org/W2346143774","https://openalex.org/W2563001386","https://openalex.org/W2574046987","https://openalex.org/W2585397740","https://openalex.org/W2604713509","https://openalex.org/W4254560685","https://openalex.org/W4321508270","https://openalex.org/W6679469509","https://openalex.org/W6731351910","https://openalex.org/W6735926645"],"related_works":["https://openalex.org/W2356038787","https://openalex.org/W2368797771","https://openalex.org/W2356466503","https://openalex.org/W2352774566","https://openalex.org/W2370201914","https://openalex.org/W2054854316","https://openalex.org/W1998651692","https://openalex.org/W2107963358","https://openalex.org/W2373656887","https://openalex.org/W2049143847"],"abstract_inverted_index":{"Majority":[0],"of":[1,11,14,48,51,69,96,109],"workflows":[2,18],"executed":[3],"nowadays":[4],"need":[5],"to":[6,76,92],"process":[7],"a":[8,36,41,70,106,119],"massive":[9],"amount":[10],"data.":[12],"Re-execution":[13],"such":[15,39],"dataintensive":[16],"scientific":[17,37],"often":[19],"results":[20,49,83],"in":[21,127],"different":[22,82],"outputs.":[23],"Scientific":[24,87],"research":[25],"progresses":[26],"when":[27],"discoveries":[28],"are":[29],"reproduced":[30],"and":[31],"verified.":[32],"However,":[33],"simply":[34],"re-enacting":[35],"computation,":[38],"as":[40],"workflow,":[42],"does":[43],"not":[44],"guarantee":[45],"the":[46,59,64,67,79,94,100,110],"correctness":[47],"because":[50],"unintentional":[52],"changes":[53],"that":[54,66,99],"may":[55,102],"have":[56,103],"interfered":[57],"with":[58],"re-enactment":[60],"process.":[61],"We":[62,112],"investigate":[63],"hypothesis":[65],"metadata":[68,88,125],"workflow":[71],"execution":[72],"can":[73,89],"be":[74,90],"used":[75,91],"explain":[77],"why":[78],"experimenter":[80,101],"observes":[81],"(cause":[84],"analysis).":[85],"Similarly,":[86],"determine":[93],"impact":[95],"intentional":[97],"variations":[98],"injected":[104],"into":[105],"new":[107],"version":[108],"workflow.":[111],"explore":[113],"these":[114],"two":[115,124,134],"complementary":[116],"cases":[117],"using":[118],"simple":[120],"algorithm":[121],"for":[122],"traversing":[123],"traces":[126],"lock-step":[128],"mode,":[129],"which":[130],"we":[131],"illustrate":[132],"through":[133],"human":[135],"genomics":[136],"data":[137],"analysis":[138],"workflows.":[139]},"counts_by_year":[{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
