{"id":"https://openalex.org/W7154270665","doi":"https://doi.org/10.48550/arxiv.2604.10834","title":"LLMs for Qualitative Data Analysis Fail on Security-specificComments in Human Experiments","display_name":"LLMs for Qualitative Data Analysis Fail on Security-specificComments in Human Experiments","publication_year":2026,"publication_date":"2026-04-12","ids":{"openalex":"https://openalex.org/W7154270665","doi":"https://doi.org/10.48550/arxiv.2604.10834"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.10834","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10834","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.10834","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133561187","display_name":"Maria Camporese","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Camporese, Maria","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085639552","display_name":"Fabio Massacci","orcid":"https://orcid.org/0000-0002-1091-8486"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Massacci, Fabio","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133557042","display_name":"Yuanjun Gong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gong, Yuanjun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.18050000071525574,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.18050000071525574,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.07020000368356705,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.0697999969124794,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.6403999924659729},{"id":"https://openalex.org/keywords/identifier","display_name":"Identifier","score":0.5705000162124634},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.5544000267982483},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.45879998803138733},{"id":"https://openalex.org/keywords/qualitative-research","display_name":"Qualitative research","score":0.41760000586509705},{"id":"https://openalex.org/keywords/thematic-analysis","display_name":"Thematic analysis","score":0.3937999904155731},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.38199999928474426},{"id":"https://openalex.org/keywords/narrative","display_name":"Narrative","score":0.3366999924182892}],"concepts":[{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.6403999924659729},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6347000002861023},{"id":"https://openalex.org/C154504017","wikidata":"https://www.wikidata.org/wiki/Q853614","display_name":"Identifier","level":2,"score":0.5705000162124634},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.5544000267982483},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.45879998803138733},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.4510999917984009},{"id":"https://openalex.org/C190248442","wikidata":"https://www.wikidata.org/wiki/Q839486","display_name":"Qualitative research","level":2,"score":0.41760000586509705},{"id":"https://openalex.org/C74196892","wikidata":"https://www.wikidata.org/wiki/Q7781188","display_name":"Thematic analysis","level":3,"score":0.3937999904155731},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.38199999928474426},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3684999942779541},{"id":"https://openalex.org/C199033989","wikidata":"https://www.wikidata.org/wiki/Q1318295","display_name":"Narrative","level":2,"score":0.3366999924182892},{"id":"https://openalex.org/C87156501","wikidata":"https://www.wikidata.org/wiki/Q7268708","display_name":"Qualitative property","level":2,"score":0.3237000107765198},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.3176000118255615},{"id":"https://openalex.org/C3018587665","wikidata":"https://www.wikidata.org/wiki/Q7268696","display_name":"Qualitative analysis","level":3,"score":0.31439998745918274},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.29510000348091125},{"id":"https://openalex.org/C71745522","wikidata":"https://www.wikidata.org/wiki/Q2476929","display_name":"Confidentiality","level":2,"score":0.2833999991416931},{"id":"https://openalex.org/C119839945","wikidata":"https://www.wikidata.org/wiki/Q6545185","display_name":"Unique identifier","level":3,"score":0.28040000796318054},{"id":"https://openalex.org/C169806903","wikidata":"https://www.wikidata.org/wiki/Q5937752","display_name":"Human error","level":2,"score":0.2793999910354614},{"id":"https://openalex.org/C64543145","wikidata":"https://www.wikidata.org/wiki/Q162942","display_name":"Intersection (aeronautics)","level":2,"score":0.275299996137619},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.2718999981880188},{"id":"https://openalex.org/C22680326","wikidata":"https://www.wikidata.org/wiki/Q7444867","display_name":"Secure coding","level":5,"score":0.2648000121116638},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2635999917984009},{"id":"https://openalex.org/C169437150","wikidata":"https://www.wikidata.org/wiki/Q8458","display_name":"Human rights","level":2,"score":0.2533999979496002}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.10834","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10834","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.10834","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10834","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"[Background:]":[0],"Thematic":[1],"analysis":[2],"of":[3],"free-text":[4,85],"justifications":[5],"in":[6,84],"human":[7,33,69,88,98,150],"experiments":[8],"provides":[9],"significant":[10],"qualitative":[11],"insights.":[12],"Yet,":[13],"it":[14],"is":[15],"costly":[16],"because":[17],"reliable":[18],"annotations":[19],"require":[20,48],"multiple":[21],"domain":[22],"experts.":[23],"Large":[24],"language":[25],"models":[26],"(LLMs)":[27],"seem":[28],"ideal":[29],"candidates":[30],"to":[31,79,97,146],"replace":[32,148],"annotators.":[34],"[Problem:]":[35],"Coding":[36],"security-specific":[37],"aspects":[38],"(code":[39],"identifiers":[40],"mentioned,":[41,43],"lines-of-code":[42],"security":[44,66],"keywords":[45],"mentioned)":[46],"may":[47],"deeper":[49],"contextual":[50],"understanding":[51],"than":[52],"sentiment":[53],"classification.":[54],"[Objective:]":[55],"Explore":[56],"whether":[57],"LLMs":[58,76,157],"can":[59],"act":[60],"as":[61],"automated":[62],"annotators":[63,99],"for":[64],"technical":[65],"comments":[67,86],"by":[68,87],"subjects.":[70],"[Method:]":[71],"We":[72,105,125],"prompt":[73],"four":[74],"top-performing":[75],"on":[77],"LiveBench":[78],"detect":[80],"nine":[81],"security-relevant":[82],"codes":[83,142],"subjects":[89],"analyzing":[90],"vulnerable":[91],"code":[92,133],"snippets.":[93],"Outputs":[94],"are":[95,138,144,161],"compared":[96],"using":[100,131],"Cohen's":[101],"Kappa":[102],"(chance-corrected":[103],"accuracy).":[104],"test":[106],"different":[107],"prompts":[108],"mimicking":[109],"annotation":[110,159],"best":[111],"practices,":[112],"including":[113],"emerging":[114],"codes,":[115],"detailed":[116,132],"codebooks":[117],"with":[118,155],"examples,":[119],"and":[120,143,158],"conflicting":[121],"examples.":[122],"[Negative":[123],"Results:]":[124],"observed":[126],"marked":[127],"improvements":[128,137],"only":[129],"when":[130],"descriptions;":[134],"however,":[135],"these":[136],"not":[139],"uniform":[140],"across":[141],"insufficient":[145],"reliably":[147],"a":[149],"annotator.":[151],"[Limitations:]":[152],"Additional":[153],"studies":[154],"more":[156],"tasks":[160],"needed.":[162]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-15T00:00:00"}
