{"id":"https://openalex.org/W7081566372","doi":"https://doi.org/10.48550/arxiv.2509.08920","title":"Documents Are People and Words Are Items: A Psychometric Approach to Textual Data with Contextual Embeddings","display_name":"Documents Are People and Words Are Items: A Psychometric Approach to Textual Data with Contextual Embeddings","publication_year":2025,"publication_date":"2025-09-10","ids":{"openalex":"https://openalex.org/W7081566372","doi":"https://doi.org/10.48550/arxiv.2509.08920"},"language":"en","primary_location":{"id":"doi:10.48550/arxiv.2509.08920","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2509.08920","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2509.08920","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Chen, Jinsong","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chen, Jinsong","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T12157","display_name":"Geochemistry and Geologic Mapping","score":0.6840999722480774,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12157","display_name":"Geochemistry and Geologic Mapping","score":0.6840999722480774,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13067","display_name":"Geological Modeling and Analysis","score":0.029200000688433647,"subfield":{"id":"https://openalex.org/subfields/1906","display_name":"Geochemistry and Petrology"},"field":{"id":"https://openalex.org/fields/19","display_name":"Earth and Planetary Sciences"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14311","display_name":"Electrical and Electromagnetic Research","score":0.01759999990463257,"subfield":{"id":"https://openalex.org/subfields/3107","display_name":"Atomic and Molecular Physics, and Optics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.4738999903202057},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.4602000117301941},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4309999942779541},{"id":"https://openalex.org/keywords/exploratory-factor-analysis","display_name":"Exploratory factor analysis","score":0.42089998722076416},{"id":"https://openalex.org/keywords/natural-language-understanding","display_name":"Natural language understanding","score":0.39070001244544983},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.3840000033378601},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.3407000005245209},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.3391000032424927}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7250999808311462},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.7026000022888184},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5410000085830688},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.4738999903202057},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.4602000117301941},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4309999942779541},{"id":"https://openalex.org/C165957694","wikidata":"https://www.wikidata.org/wiki/Q5421350","display_name":"Exploratory factor analysis","level":3,"score":0.42089998722076416},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.39070001244544983},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.3840000033378601},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.3407000005245209},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3391000032424927},{"id":"https://openalex.org/C171686336","wikidata":"https://www.wikidata.org/wiki/Q3532085","display_name":"Topic model","level":2,"score":0.33160001039505005},{"id":"https://openalex.org/C527412718","wikidata":"https://www.wikidata.org/wiki/Q855395","display_name":"Interpretation (philosophy)","level":2,"score":0.32899999618530273},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.32580000162124634},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.30250000953674316},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.30239999294281006},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.2996000051498413},{"id":"https://openalex.org/C71611378","wikidata":"https://www.wikidata.org/wiki/Q5165191","display_name":"Contextual design","level":3,"score":0.29170000553131104},{"id":"https://openalex.org/C170133592","wikidata":"https://www.wikidata.org/wiki/Q1806883","display_name":"Latent semantic analysis","level":2,"score":0.28760001063346863},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.28610000014305115},{"id":"https://openalex.org/C51167844","wikidata":"https://www.wikidata.org/wiki/Q4422623","display_name":"Latent variable","level":2,"score":0.28519999980926514},{"id":"https://openalex.org/C3018260909","wikidata":"https://www.wikidata.org/wiki/Q1322871","display_name":"Exploratory analysis","level":2,"score":0.26109999418258667},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.25200000405311584},{"id":"https://openalex.org/C2983448237","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Language understanding","level":2,"score":0.25130000710487366}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2509.08920","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2509.08920","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2509.08920","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2509.08920","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.8521273136138916,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"This":[0,163],"research":[1],"introduces":[2],"a":[3,45,67],"novel":[4],"psychometric":[5,32,47,80,169],"method":[6],"for":[7,31,178],"analyzing":[8],"textual":[9,25,161,172,184],"data":[10,26,29,173],"using":[11],"large":[12],"language":[13,89],"models.":[14],"By":[15],"leveraging":[16],"contextual":[17,21,56,76,103],"embeddings":[18],"to":[19,97,121,141,153],"create":[20],"scores,":[22],"we":[23,86,109],"transform":[24],"into":[27],"response":[28],"suitable":[30],"analysis.":[33,81],"Treating":[34],"documents":[35,65],"as":[36,40,187],"individuals":[37],"and":[38,78,92,101,118,123,130,158,190],"words":[39,135],"items,":[41],"this":[42],"approach":[43,164],"provides":[44],"natural":[46,88],"interpretation":[48],"under":[49],"the":[50,83,106,132,142,150,168],"assumption":[51],"that":[52],"certain":[53],"keywords,":[54],"whose":[55],"meanings":[57],"vary":[58],"significantly":[59],"across":[60],"documents,":[61],"can":[62],"effectively":[63],"differentiate":[64],"within":[66,160],"corpus.":[68],"The":[69],"modeling":[70],"process":[71],"comprises":[72],"two":[73],"stages:":[74],"obtaining":[75],"scores":[77],"performing":[79],"In":[82,105],"first":[84],"stage,":[85,108],"utilize":[87],"processing":[90],"techniques":[91],"encoder":[93],"based":[94],"transformer":[95],"models":[96],"identify":[98,131],"common":[99],"keywords":[100],"generate":[102],"scores.":[104],"second":[107],"employ":[110],"various":[111],"types":[112],"of":[113,171],"factor":[114,128],"analysis,":[115],"including":[116],"exploratory":[117],"bifactor":[119],"models,":[120],"extract":[122],"define":[124],"latent":[125,155],"factors,":[126],"determine":[127],"correlations,":[129],"most":[133],"significant":[134],"associated":[136],"with":[137],"each":[138],"factor.":[139],"Applied":[140],"Wiki":[143],"STEM":[144],"corpus,":[145],"our":[146],"experimental":[147],"results":[148],"demonstrate":[149],"method's":[151],"potential":[152],"uncover":[154],"knowledge":[156],"dimensions":[157],"patterns":[159],"data.":[162],"not":[165],"only":[166],"enhances":[167],"analysis":[170],"but":[174],"also":[175],"holds":[176],"promise":[177],"applications":[179],"in":[180,183],"fields":[181],"rich":[182],"information,":[185],"such":[186],"education,":[188],"psychology,":[189],"law.":[191]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
