{"id":"https://openalex.org/W7147553664","doi":"https://doi.org/10.48550/arxiv.2603.26930","title":"In your own words: computationally identifying interpretable themes in free-text survey data","display_name":"In your own words: computationally identifying interpretable themes in free-text survey data","publication_year":2026,"publication_date":"2026-03-27","ids":{"openalex":"https://openalex.org/W7147553664","doi":"https://doi.org/10.48550/arxiv.2603.26930"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.26930","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.26930","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.26930","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5132631891","display_name":"Jenny S Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wang, Jenny S","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066788160","display_name":"Aliya Saperstein","orcid":"https://orcid.org/0000-0002-6429-1172"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Saperstein, Aliya","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5132697566","display_name":"Emma Pierson","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pierson, Emma","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5132631891"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11539","display_name":"Survey Methodology and Nonresponse","score":0.7339000105857849,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11539","display_name":"Survey Methodology and Nonresponse","score":0.7339000105857849,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12892","display_name":"Social Power and Status Dynamics","score":0.025800000876188278,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13910","display_name":"Computational and Text Analysis Methods","score":0.019500000402331352,"subfield":{"id":"https://openalex.org/subfields/3300","display_name":"General Social Sciences"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/salient","display_name":"Salient","score":0.8123000264167786},{"id":"https://openalex.org/keywords/variation","display_name":"Variation (astronomy)","score":0.5677000284194946},{"id":"https://openalex.org/keywords/survey-data-collection","display_name":"Survey data collection","score":0.5414999723434448},{"id":"https://openalex.org/keywords/identity","display_name":"Identity (music)","score":0.5307000279426575},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.436599999666214},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.430400013923645},{"id":"https://openalex.org/keywords/qualitative-property","display_name":"Qualitative property","score":0.39570000767707825},{"id":"https://openalex.org/keywords/qualitative-research","display_name":"Qualitative research","score":0.37310001254081726}],"concepts":[{"id":"https://openalex.org/C2780719617","wikidata":"https://www.wikidata.org/wiki/Q1030752","display_name":"Salient","level":2,"score":0.8123000264167786},{"id":"https://openalex.org/C2778334786","wikidata":"https://www.wikidata.org/wiki/Q1586270","display_name":"Variation (astronomy)","level":2,"score":0.5677000284194946},{"id":"https://openalex.org/C198477413","wikidata":"https://www.wikidata.org/wiki/Q7647069","display_name":"Survey data collection","level":2,"score":0.5414999723434448},{"id":"https://openalex.org/C2778355321","wikidata":"https://www.wikidata.org/wiki/Q17079427","display_name":"Identity (music)","level":2,"score":0.5307000279426575},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5062999725341797},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.5013999938964844},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.4781000018119812},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.436599999666214},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.430400013923645},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4090999960899353},{"id":"https://openalex.org/C87156501","wikidata":"https://www.wikidata.org/wiki/Q7268708","display_name":"Qualitative property","level":2,"score":0.39570000767707825},{"id":"https://openalex.org/C190248442","wikidata":"https://www.wikidata.org/wiki/Q839486","display_name":"Qualitative research","level":2,"score":0.37310001254081726},{"id":"https://openalex.org/C85973986","wikidata":"https://www.wikidata.org/wiki/Q1091731","display_name":"Exploratory research","level":2,"score":0.3537999987602234},{"id":"https://openalex.org/C2776307086","wikidata":"https://www.wikidata.org/wiki/Q1165905","display_name":"Sexual identity","level":3,"score":0.33390000462532043},{"id":"https://openalex.org/C173481278","wikidata":"https://www.wikidata.org/wiki/Q7257997","display_name":"Survey research","level":2,"score":0.33239999413490295},{"id":"https://openalex.org/C2780977526","wikidata":"https://www.wikidata.org/wiki/Q42417149","display_name":"Data exploration","level":3,"score":0.3212999999523163},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.3172000050544739},{"id":"https://openalex.org/C73945780","wikidata":"https://www.wikidata.org/wiki/Q814232","display_name":"Survey methodology","level":2,"score":0.3140999972820282},{"id":"https://openalex.org/C133462117","wikidata":"https://www.wikidata.org/wiki/Q4929239","display_name":"Data collection","level":2,"score":0.3109000027179718},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.30809998512268066},{"id":"https://openalex.org/C539667460","wikidata":"https://www.wikidata.org/wiki/Q2414942","display_name":"Management science","level":1,"score":0.2964000105857849},{"id":"https://openalex.org/C54998920","wikidata":"https://www.wikidata.org/wiki/Q16255102","display_name":"Self-report study","level":2,"score":0.2896000146865845},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2863999903202057},{"id":"https://openalex.org/C2777997956","wikidata":"https://www.wikidata.org/wiki/Q17888","display_name":"Sexual orientation","level":2,"score":0.2840000092983246},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.28060001134872437},{"id":"https://openalex.org/C3019221131","wikidata":"https://www.wikidata.org/wiki/Q55621107","display_name":"Lived experience","level":2,"score":0.2736999988555908},{"id":"https://openalex.org/C3018260909","wikidata":"https://www.wikidata.org/wiki/Q1322871","display_name":"Exploratory analysis","level":2,"score":0.25870001316070557},{"id":"https://openalex.org/C2991839931","wikidata":"https://www.wikidata.org/wiki/Q48264","display_name":"Gender identity","level":2,"score":0.2583000063896179},{"id":"https://openalex.org/C75630572","wikidata":"https://www.wikidata.org/wiki/Q538904","display_name":"Applied psychology","level":1,"score":0.2581999897956848}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.26930","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.26930","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.26930","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.26930","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.7263134121894836,"id":"https://metadata.un.org/sdg/5","display_name":"Gender equality"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Free-text":[0],"survey":[1,34,101,182],"responses":[2],"can":[3,105,174],"provide":[4],"nuance":[5],"often":[6],"missed":[7],"by":[8,90,114],"structured":[9,107],"questions,":[10],"but":[11],"remain":[12],"difficult":[13],"to":[14,57,109,111,184],"statistically":[15],"analyze.":[16],"To":[17,47],"address":[18],"this,":[19],"we":[20,54],"introduce":[21],"In":[22],"Your":[23],"Own":[24],"Words,":[25],"a":[26,58,178],"computational":[27,92],"framework":[28,173],"for":[29],"exploratory":[30],"analyses":[31],"of":[32,51,61,64,162,181],"free-text":[33,42,62],"data":[35],"that":[36,126,164],"identifies":[37],"structured,":[38],"interpretable":[39,86,186],"themes":[40,75,95,134,151,187],"in":[41,100,143,177],"responses,":[43],"facilitating":[44],"systematic":[45,153],"analysis.":[46],"illustrate":[48],"the":[49,133,150],"benefits":[50],"this":[52,80],"approach,":[53],"apply":[55],"it":[56],"new":[59],"dataset":[60,81],"descriptions":[63],"race,":[65],"gender,":[66],"and":[67,85,122,146,157],"sexual":[68],"orientation":[69],"from":[70,188],"1,004":[71],"U.S.":[72],"participants.":[73],"The":[74,94],"our":[76,172],"approach":[77],"produces":[78],"on":[79],"are":[82],"more":[83],"coherent":[84],"than":[87],"those":[88],"produced":[89],"past":[91],"methods.":[93,194],"have":[96],"three":[97],"practical":[98],"applications":[99],"research.":[102],"First,":[103],"they":[104],"suggest":[106],"questions":[108],"add":[110],"future":[112],"surveys":[113,128],"surfacing":[115],"salient":[116],"constructs":[117],"-":[118,125],"such":[119],"as":[120],"belonging":[121],"identity":[123,147],"fluidity":[124],"existing":[127,165,192],"do":[129,167],"not":[130,168],"capture.":[131],"Second,":[132],"reveal":[135],"heterogeneity":[136],"within":[137],"standardized":[138],"categories,":[139],"explaining":[140],"additional":[141],"variation":[142],"health,":[144],"well-being,":[145],"importance.":[148],"Third,":[149],"illuminate":[152],"discordance":[154],"between":[155],"self-identified":[156],"perceived":[158],"identities,":[159],"highlighting":[160],"mechanisms":[161],"misrecognition":[163],"measures":[166],"reflect.":[169],"More":[170],"broadly,":[171],"be":[175],"deployed":[176],"wide":[179],"range":[180],"settings":[183],"identify":[185],"free":[189],"text,":[190],"complementing":[191],"qualitative":[193]},"counts_by_year":[],"updated_date":"2026-04-08T06:01:36.053099","created_date":"2026-04-02T00:00:00"}
