{"id":"https://openalex.org/W4417010789","doi":"https://doi.org/10.48550/arxiv.2512.02910","title":"In Silico Development of Psychometric Scales: Feasibility of Representative Population Data Simulation with LLMs","display_name":"In Silico Development of Psychometric Scales: Feasibility of Representative Population Data Simulation with LLMs","publication_year":2025,"publication_date":"2025-12-02","ids":{"openalex":"https://openalex.org/W4417010789","doi":"https://doi.org/10.48550/arxiv.2512.02910"},"language":null,"primary_location":{"id":"pmh:oai:arXiv.org:2512.02910","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2512.02910","pdf_url":"https://arxiv.org/pdf/2512.02910","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2512.02910","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080689761","display_name":"Enrico Cipriani","orcid":"https://orcid.org/0000-0001-6690-6591"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Cipriani, Enrico","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043207629","display_name":"Pavel Okopnyi","orcid":"https://orcid.org/0000-0001-7034-2733"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Okopnyi, Pavel","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023422621","display_name":"Danilo Menicucci","orcid":"https://orcid.org/0000-0002-5521-4108"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Menicucci, Danilo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5020841194","display_name":"Simone Grassini","orcid":"https://orcid.org/0000-0002-4189-7585"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Grassini, Simone","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5080689761"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12488","display_name":"Mental Health via Writing","score":0.3000999987125397,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12488","display_name":"Mental Health via Writing","score":0.3000999987125397,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10467","display_name":"Psychometric Methodologies and Testing","score":0.15039999783039093,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13283","display_name":"Mental Health Research Topics","score":0.10339999943971634,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.6197999715805054},{"id":"https://openalex.org/keywords/in-silico","display_name":"In silico","score":0.4862000048160553},{"id":"https://openalex.org/keywords/population","display_name":"Population","score":0.46860000491142273},{"id":"https://openalex.org/keywords/measurement-invariance","display_name":"Measurement invariance","score":0.4237000048160553},{"id":"https://openalex.org/keywords/psychometrics","display_name":"Psychometrics","score":0.4142000079154968},{"id":"https://openalex.org/keywords/synthetic-data","display_name":"Synthetic data","score":0.4009999930858612},{"id":"https://openalex.org/keywords/experimental-data","display_name":"Experimental data","score":0.3776000142097473},{"id":"https://openalex.org/keywords/measure","display_name":"Measure (data warehouse)","score":0.3684999942779541},{"id":"https://openalex.org/keywords/latent-variable","display_name":"Latent variable","score":0.3515999913215637}],"concepts":[{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.6197999715805054},{"id":"https://openalex.org/C2775905019","wikidata":"https://www.wikidata.org/wiki/Q192572","display_name":"In silico","level":3,"score":0.4862000048160553},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.47589999437332153},{"id":"https://openalex.org/C2908647359","wikidata":"https://www.wikidata.org/wiki/Q2625603","display_name":"Population","level":2,"score":0.46860000491142273},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4652000069618225},{"id":"https://openalex.org/C1589151","wikidata":"https://www.wikidata.org/wiki/Q6804207","display_name":"Measurement invariance","level":4,"score":0.4237000048160553},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4230000078678131},{"id":"https://openalex.org/C171606756","wikidata":"https://www.wikidata.org/wiki/Q506132","display_name":"Psychometrics","level":2,"score":0.4142000079154968},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.4009999930858612},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3833000063896179},{"id":"https://openalex.org/C55037315","wikidata":"https://www.wikidata.org/wiki/Q5421151","display_name":"Experimental data","level":2,"score":0.3776000142097473},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.3684999942779541},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.36010000109672546},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3562000095844269},{"id":"https://openalex.org/C51167844","wikidata":"https://www.wikidata.org/wiki/Q4422623","display_name":"Latent variable","level":2,"score":0.3515999913215637},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.3449999988079071},{"id":"https://openalex.org/C87007009","wikidata":"https://www.wikidata.org/wiki/Q210832","display_name":"Statistical hypothesis testing","level":2,"score":0.32739999890327454},{"id":"https://openalex.org/C198477413","wikidata":"https://www.wikidata.org/wiki/Q7647069","display_name":"Survey data collection","level":2,"score":0.30390000343322754},{"id":"https://openalex.org/C19875794","wikidata":"https://www.wikidata.org/wiki/Q1207340","display_name":"Item response theory","level":3,"score":0.29919999837875366},{"id":"https://openalex.org/C133462117","wikidata":"https://www.wikidata.org/wiki/Q4929239","display_name":"Data collection","level":2,"score":0.296099990606308},{"id":"https://openalex.org/C65965080","wikidata":"https://www.wikidata.org/wiki/Q1806885","display_name":"Latent variable model","level":3,"score":0.2831000089645386},{"id":"https://openalex.org/C40722632","wikidata":"https://www.wikidata.org/wiki/Q5160137","display_name":"Confirmatory factor analysis","level":3,"score":0.28290000557899475},{"id":"https://openalex.org/C3019813237","wikidata":"https://www.wikidata.org/wiki/Q65089264","display_name":"Model validation","level":2,"score":0.2791999876499176},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.2768000066280365},{"id":"https://openalex.org/C9357733","wikidata":"https://www.wikidata.org/wiki/Q6878417","display_name":"Missing data","level":2,"score":0.2766000032424927},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.2703000009059906},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.26930001378059387},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.2648000121116638},{"id":"https://openalex.org/C71104824","wikidata":"https://www.wikidata.org/wiki/Q1476639","display_name":"Structural equation modeling","level":2,"score":0.2630000114440918},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.26249998807907104},{"id":"https://openalex.org/C3020493868","wikidata":"https://www.wikidata.org/wiki/Q55631277","display_name":"Real world data","level":2,"score":0.25429999828338623}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2512.02910","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2512.02910","pdf_url":"https://arxiv.org/pdf/2512.02910","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2512.02910","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.02910","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2512.02910","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2512.02910","pdf_url":"https://arxiv.org/pdf/2512.02910","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Developing":[0],"and":[1,11,69,101,125,129,150,153,160,209,212],"validating":[2],"psychometric":[3,193,219],"scales":[4,95],"requires":[5],"large":[6],"samples,":[7],"multiple":[8],"testing":[9],"phases,":[10],"substantial":[12,146],"resources.":[13],"Recent":[14],"advances":[15],"in":[16,43,89,120,157,217],"Large":[17],"Language":[18],"Models":[19],"(LLMs)":[20],"enable":[21],"the":[22,66,116,137],"generation":[23],"of":[24,37,72,122,207],"synthetic":[25,151],"participant":[26],"data":[27,48,81,100,174,187,210],"by":[28],"prompting":[29],"models":[30],"to":[31,108,216],"answer":[32],"items":[33],"while":[34,163],"impersonating":[35],"individuals":[36],"specific":[38],"demographic":[39],"profiles,":[40],"potentially":[41],"allowing":[42],"silico":[44,218],"piloting":[45],"before":[46],"real":[47,83,149],"collection.":[49],"Across":[50],"four":[51,123],"preregistered":[52],"studies":[53,124],"(N":[54],"=":[55],"circa":[56],"300":[57],"each),":[58],"we":[59,78,92],"tested":[60],"whether":[61,104],"LLM-simulated":[62],"datasets":[63,84,114,177],"can":[64],"reproduce":[65],"latent":[67,167],"structures":[68,106,119],"measurement":[70],"properties":[71],"human":[73,111],"responses.":[74],"In":[75],"Studies":[76,90],"1-2,":[77],"compared":[79],"LLM-generated":[80,186],"with":[82,132],"for":[85,136,190,199],"two":[86,138],"validated":[87],"scales;":[88],"3-4,":[91],"created":[93],"new":[94],"using":[96],"EFA":[97],"on":[98],"simulated":[99],"then":[102],"examined":[103],"these":[105],"generalized":[107],"newly":[109,139],"collected":[110],"samples.":[112],"Simulated":[113,176],"replicated":[115],"intended":[117],"factor":[118],"three":[121],"showed":[126,179],"consistent":[127],"configural":[128],"metric":[130],"invariance,":[131],"scalar":[133],"invariance":[134,182],"achieved":[135],"developed":[140],"scales.":[141],"However,":[142],"correlation-based":[143],"tests":[144],"revealed":[145],"differences":[147],"between":[148],"datasets,":[152],"notable":[154],"discrepancies":[155],"appeared":[156],"score":[158],"distributions":[159],"variances.":[161],"Thus,":[162],"LLMs":[164],"capture":[165],"group-level":[166,192],"structures,":[168],"they":[169],"do":[170],"not":[171,196],"approximate":[172],"individual-level":[173,200],"properties.":[175],"also":[178],"full":[180],"internal":[181],"across":[183],"gender.":[184],"Overall,":[185],"appear":[188],"useful":[189],"early-stage,":[191],"prototyping,":[194],"but":[195],"as":[197],"substitutes":[198],"validation.":[201],"We":[202],"discuss":[203],"methodological":[204],"limitations,":[205],"risks":[206],"bias":[208],"pollution,":[211],"ethical":[213],"considerations":[214],"related":[215],"simulations.":[220]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-12-04T00:00:00"}
