{"id":"https://openalex.org/W4405437285","doi":"https://doi.org/10.1021/acs.jcim.4c01766","title":"Classification-Based Detection and Quantification of Cross-Domain Data Bias in Materials Discovery","display_name":"Classification-Based Detection and Quantification of Cross-Domain Data Bias in Materials Discovery","publication_year":2024,"publication_date":"2024-12-16","ids":{"openalex":"https://openalex.org/W4405437285","doi":"https://doi.org/10.1021/acs.jcim.4c01766","pmid":"https://pubmed.ncbi.nlm.nih.gov/39681303"},"language":"en","primary_location":{"id":"doi:10.1021/acs.jcim.4c01766","is_oa":false,"landing_page_url":"https://doi.org/10.1021/acs.jcim.4c01766","pdf_url":null,"source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5008644832","display_name":"Giovanni Trezza","orcid":"https://orcid.org/0000-0003-0601-6292"},"institutions":[{"id":"https://openalex.org/I177477856","display_name":"Politecnico di Torino","ror":"https://ror.org/00bgk9508","country_code":"IT","type":"education","lineage":["https://openalex.org/I177477856"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Giovanni Trezza","raw_affiliation_strings":["Department of Energy, Politecnico di Torino, C.so Duca degli Abruzzi 24, Torino 10129, Italy"],"affiliations":[{"raw_affiliation_string":"Department of Energy, Politecnico di Torino, C.so Duca degli Abruzzi 24, Torino 10129, Italy","institution_ids":["https://openalex.org/I177477856"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5071497805","display_name":"Eliodoro Chiavazzo","orcid":"https://orcid.org/0000-0001-6165-7434"},"institutions":[{"id":"https://openalex.org/I177477856","display_name":"Politecnico di Torino","ror":"https://ror.org/00bgk9508","country_code":"IT","type":"education","lineage":["https://openalex.org/I177477856"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Eliodoro Chiavazzo","raw_affiliation_strings":["Department of Energy, Politecnico di Torino, C.so Duca degli Abruzzi 24, Torino 10129, Italy"],"affiliations":[{"raw_affiliation_string":"Department of Energy, Politecnico di Torino, C.so Duca degli Abruzzi 24, Torino 10129, Italy","institution_ids":["https://openalex.org/I177477856"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5071497805"],"corresponding_institution_ids":["https://openalex.org/I177477856"],"apc_list":null,"apc_paid":null,"fwci":0.926,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.77281438,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"65","issue":"4","first_page":"1747","last_page":"1761"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":1.0,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":1.0,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9904999732971191,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7572507858276367},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5326632261276245},{"id":"https://openalex.org/keywords/property","display_name":"Property (philosophy)","score":0.5258203744888306},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5093713998794556},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48802605271339417},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.47646641731262207},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.46716344356536865},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.4573572874069214},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4437776803970337},{"id":"https://openalex.org/keywords/applicability-domain","display_name":"Applicability domain","score":0.44088977575302124},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4323660135269165},{"id":"https://openalex.org/keywords/data-quality","display_name":"Data quality","score":0.4113403856754303},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.10986840724945068}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7572507858276367},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5326632261276245},{"id":"https://openalex.org/C189950617","wikidata":"https://www.wikidata.org/wiki/Q937228","display_name":"Property (philosophy)","level":2,"score":0.5258203744888306},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5093713998794556},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48802605271339417},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.47646641731262207},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.46716344356536865},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.4573572874069214},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4437776803970337},{"id":"https://openalex.org/C107908354","wikidata":"https://www.wikidata.org/wiki/Q4781456","display_name":"Applicability domain","level":3,"score":0.44088977575302124},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4323660135269165},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.4113403856754303},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.10986840724945068},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C164126121","wikidata":"https://www.wikidata.org/wiki/Q766383","display_name":"Quantitative structure\u2013activity relationship","level":2,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.0}],"mesh":[{"descriptor_ui":"D000074266","descriptor_name":"Materials Science","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D001185","descriptor_name":"Artificial Intelligence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D001185","descriptor_name":"Artificial Intelligence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D001185","descriptor_name":"Artificial Intelligence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D015982","descriptor_name":"Bias","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D015982","descriptor_name":"Bias","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false}],"locations_count":2,"locations":[{"id":"doi:10.1021/acs.jcim.4c01766","is_oa":false,"landing_page_url":"https://doi.org/10.1021/acs.jcim.4c01766","pdf_url":null,"source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},{"id":"pmid:39681303","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/39681303","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of chemical information and modeling","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5099999904632568,"display_name":"Climate action","id":"https://metadata.un.org/sdg/13"}],"awards":[{"id":"https://openalex.org/G3913891125","display_name":null,"funder_award_id":"1561","funder_id":"https://openalex.org/F4320331528","funder_display_name":"Ministero dell'Universit\u00e0 e della Ricerca"}],"funders":[{"id":"https://openalex.org/F4320331528","display_name":"Ministero dell'Universit\u00e0 e della Ricerca","ror":null},{"id":"https://openalex.org/F7311471023","display_name":"NextGenerationEU","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":62,"referenced_works":["https://openalex.org/W567627125","https://openalex.org/W1545632247","https://openalex.org/W1983815543","https://openalex.org/W1984228539","https://openalex.org/W1988372059","https://openalex.org/W1997054659","https://openalex.org/W1998875346","https://openalex.org/W2031342017","https://openalex.org/W2034935747","https://openalex.org/W2035375194","https://openalex.org/W2151807416","https://openalex.org/W2338621248","https://openalex.org/W2464725281","https://openalex.org/W2472866839","https://openalex.org/W2728789193","https://openalex.org/W2751488329","https://openalex.org/W2803629276","https://openalex.org/W2804431384","https://openalex.org/W2902648622","https://openalex.org/W2902694017","https://openalex.org/W2937056002","https://openalex.org/W2947210186","https://openalex.org/W2963899413","https://openalex.org/W2968071222","https://openalex.org/W2972219719","https://openalex.org/W2972597827","https://openalex.org/W2975845070","https://openalex.org/W2999615587","https://openalex.org/W3005497310","https://openalex.org/W3005735954","https://openalex.org/W3013008289","https://openalex.org/W3016970897","https://openalex.org/W3033499467","https://openalex.org/W3042553076","https://openalex.org/W3098905070","https://openalex.org/W3100710928","https://openalex.org/W3101568640","https://openalex.org/W3103934428","https://openalex.org/W3113317199","https://openalex.org/W3128429991","https://openalex.org/W3158878314","https://openalex.org/W3169577313","https://openalex.org/W3177828909","https://openalex.org/W3177983580","https://openalex.org/W3181860256","https://openalex.org/W4242458481","https://openalex.org/W4250042253","https://openalex.org/W4281479001","https://openalex.org/W4289766776","https://openalex.org/W4293567371","https://openalex.org/W4303648596","https://openalex.org/W4312112856","https://openalex.org/W4362700002","https://openalex.org/W4381328264","https://openalex.org/W4383215346","https://openalex.org/W4385490607","https://openalex.org/W4388845378","https://openalex.org/W4389132751","https://openalex.org/W4389407755","https://openalex.org/W4390701915","https://openalex.org/W4390940921","https://openalex.org/W4400681177"],"related_works":["https://openalex.org/W2142085049","https://openalex.org/W2076018148","https://openalex.org/W2140798747","https://openalex.org/W2948169060","https://openalex.org/W2730112582","https://openalex.org/W3203840987","https://openalex.org/W2110696645","https://openalex.org/W2358580169","https://openalex.org/W2111347279","https://openalex.org/W4399426197"],"abstract_inverted_index":{"It":[0],"stands":[1],"to":[2,29,55,64,185,192,202,211,219],"reason":[3],"that":[4,130],"the":[5,8,32,66,77,96,105,154,204],"amount":[6],"and":[7,61,114,132,147,167,174,189,213],"quality":[9],"of":[10,13,59,88,95,156,172,206],"data":[11,53,117,134],"are":[12,119,160],"key":[14],"importance":[15],"for":[16,86,152],"setting":[17],"up":[18],"accurate":[19],"artificial":[20],"intelligence":[21],"(AI)-driven":[22],"models.":[23],"Among":[24],"others,":[25],"a":[26,46,51,57,72,127,157,164],"fundamental":[27],"aspect":[28,102],"consider":[30],"is":[31,42,48,168],"bias":[33],"introduced":[34],"during":[35],"sample":[36],"selection":[37],"in":[38],"database":[39],"generation.":[40],"This":[41,182],"particularly":[43],"relevant":[44],"when":[45,110,209],"model":[47,80,159],"trained":[49],"on":[50,139],"specialized":[52],"set":[54],"predict":[56],"property":[58,68],"interest":[60],"then":[62],"applied":[63,210],"forecast":[65],"same":[67],"over":[69],"samples":[70,93],"having":[71],"completely":[73],"different":[74],"genesis.":[75],"Indeed,":[76],"resulting":[78],"biased":[79],"will":[81],"likely":[82,161],"produce":[83],"unreliable":[84],"predictions":[85,155],"many":[87],"those":[89,149],"out-of-the-box":[90,150],"samples,":[91],"i.e.,":[92],"out":[94],"training":[97],"set.":[98],"Neglecting":[99],"such":[100],"an":[101],"may":[103],"hinder":[104],"AI-based":[106],"discovery":[107],"process,":[108],"even":[109],"high-quality,":[111],"sufficiently":[112],"large,":[113],"highly":[115],"reputable":[116],"sources":[118],"available.":[120],"To":[121],"address":[122],"this":[123],"challenge,":[124],"we":[125],"propose":[126],"new":[128],"method":[129],"detects":[131],"quantifies":[133],"bias,":[135],"reducing":[136],"its":[137],"impact":[138],"materials":[140,151,176,223],"discovery.":[141,224],"Our":[142],"approach,":[143],"aimed":[144],"at":[145],"identifying":[146],"excluding":[148],"which":[153],"pretrained":[158],"unreliable,":[162],"leverages":[163],"classification":[165],"strategy":[166],"validated":[169],"by":[170],"means":[171],"superconductor":[173],"thermoelectric":[175],"as":[177],"two":[178],"representative":[179],"case":[180],"studies.":[181],"methodology,":[183],"designed":[184],"be":[186],"simple,":[187],"flexible,":[188],"easily":[190],"adaptable":[191],"any":[193],"architecture,":[194],"including":[195],"modern":[196],"graph":[197],"equivariant":[198],"neural":[199],"networks,":[200],"aims":[201],"enhance":[203],"reliability":[205],"AI":[207],"models":[208],"diverse":[212],"previously":[214],"unseen":[215],"materials,":[216],"thereby":[217],"contributing":[218],"more":[220],"reliable":[221],"AI-driven":[222]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":6}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
