{"id":"https://openalex.org/W7139948344","doi":"https://doi.org/10.48550/arxiv.2603.18393","title":"Where are the Hidden Gems? Applying Transformer Models for Design Discussion Detection","display_name":"Where are the Hidden Gems? Applying Transformer Models for Design Discussion Detection","publication_year":2026,"publication_date":"2026-03-19","ids":{"openalex":"https://openalex.org/W7139948344","doi":"https://doi.org/10.48550/arxiv.2603.18393"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.18393","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.18393","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.18393","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5115465491","display_name":"Lawrence Arkoh","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Arkoh, Lawrence","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008359300","display_name":"Daniel Feitosa","orcid":"https://orcid.org/0000-0001-9371-232X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Feitosa, Daniel","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5121126913","display_name":"Wesley K. G. Assun\u00e7\u00e3o","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Assun\u00e7\u00e3o, Wesley K. G.","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5115465491"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.7275000214576721,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.7275000214576721,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10430","display_name":"Software Engineering Techniques and Practices","score":0.07050000131130219,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.03229999914765358,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/commit","display_name":"Commit","score":0.6506999731063843},{"id":"https://openalex.org/keywords/code-refactoring","display_name":"Code refactoring","score":0.6123999953269958},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5220000147819519},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.48069998621940613},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.427700012922287},{"id":"https://openalex.org/keywords/flexibility","display_name":"Flexibility (engineering)","score":0.4172999858856201},{"id":"https://openalex.org/keywords/conceptual-design","display_name":"Conceptual design","score":0.3846000134944916},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.36059999465942383}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6850000023841858},{"id":"https://openalex.org/C153180980","wikidata":"https://www.wikidata.org/wiki/Q19776675","display_name":"Commit","level":2,"score":0.6506999731063843},{"id":"https://openalex.org/C152752567","wikidata":"https://www.wikidata.org/wiki/Q116877","display_name":"Code refactoring","level":3,"score":0.6123999953269958},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5220000147819519},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5171999931335449},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.48069998621940613},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4438000023365021},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.427700012922287},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.4172999858856201},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.4059000015258789},{"id":"https://openalex.org/C120208923","wikidata":"https://www.wikidata.org/wiki/Q5158435","display_name":"Conceptual design","level":2,"score":0.3846000134944916},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.36059999465942383},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.35929998755455017},{"id":"https://openalex.org/C100660578","wikidata":"https://www.wikidata.org/wiki/Q18733","display_name":"Recall","level":2,"score":0.35749998688697815},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.35530000925064087},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.35199999809265137},{"id":"https://openalex.org/C52913732","wikidata":"https://www.wikidata.org/wiki/Q857102","display_name":"Software design","level":4,"score":0.31700000166893005},{"id":"https://openalex.org/C139143892","wikidata":"https://www.wikidata.org/wiki/Q7441615","display_name":"Search-based software engineering","level":5,"score":0.29030001163482666},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.29010000824928284},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2833000123500824},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.28040000796318054},{"id":"https://openalex.org/C12590798","wikidata":"https://www.wikidata.org/wiki/Q3933199","display_name":"Replication (statistics)","level":2,"score":0.2736999988555908},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2689000070095062},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.26109999418258667}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.18393","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.18393","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.18393","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.18393","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.5643386244773865,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Design":[0,23],"decisions":[1,50],"are":[2],"at":[3],"the":[4,61,67,112,190,233],"core":[5],"of":[6,63,70,106,114,136,237],"software":[7,39],"engineering":[8],"and":[9,20,38,66,84,122,147,161,170,173,193,235],"appear":[10],"in":[11,51,101,151],"Q\\&amp;A":[12],"forums,":[13],"mailing":[14],"lists,":[15],"pull":[16,167],"requests,":[17,168],"issue":[18],"trackers,":[19],"commit":[21],"messages.":[22],"discussions":[24],"spanning":[25],"a":[26,133,200],"project's":[27],"history":[28],"provide":[29],"valuable":[30],"information":[31],"for":[32,124,215,242],"informed":[33],"decision-making,":[34],"such":[35],"as":[36],"refactoring":[37],"modernization.":[40],"Machine":[41],"learning":[42],"techniques":[43],"have":[44],"been":[45],"used":[46],"to":[47,110],"detect":[48],"design":[49,244],"natural":[52],"language":[53,240],"discussions;":[54],"however,":[55],"their":[56,89],"effectiveness":[57],"is":[58,109],"limited":[59],"by":[60],"scarcity":[62],"labeled":[64],"data":[65,216],"high":[68],"cost":[69],"annotation.":[71],"Prior":[72],"work":[73,108],"adopted":[74],"cross-domain":[75,138],"strategies":[76],"with":[77,143,203],"traditional":[78,96],"classifiers,":[79],"training":[80],"on":[81,86,158,163],"one":[82],"domain":[83],"testing":[85],"another.":[87],"Despite":[88],"success,":[90],"transformer-based":[91,115],"models,":[92],"which":[93],"often":[94],"outperform":[95],"methods,":[97],"remain":[98],"largely":[99],"unexplored":[100],"this":[102,107,129],"setting.":[103],"The":[104,154],"goal":[105],"investigate":[111],"performance":[113],"models":[116,155,241],"(i.e.,":[117,166],"BERT,":[118],"RoBERTa,":[119],"XLNet,":[120],"LaMini-Flan-T5-77M,":[121],"ChatGPT-4o-mini)":[123],"detecting":[125,243],"design-related":[126],"discussions.":[127],"To":[128],"end,":[130],"we":[131],"conduct":[132],"conceptual":[134],"replication":[135],"prior":[137,220],"studies":[139],"while":[140,180],"extending":[141],"them":[142],"modern":[144,239],"transformer":[145],"architectures":[146],"addressing":[148],"methodological":[149],"issues":[150],"earlier":[152],"work.":[153],"were":[156],"fine-tuned":[157],"Stack":[159],"Overflow":[160],"evaluated":[162,212],"GitHub":[164],"artifacts":[165],"issues,":[169],"commits).":[171],"BERT":[172],"RoBERTa":[174],"show":[175],"strong":[176],"recall":[177,192],"across":[178],"domains,":[179],"XLNet":[181],"achieves":[182],"higher":[183],"precision":[184,205],"but":[185,206,218],"lower":[186],"recall.":[187],"ChatGPT-4o-mini":[188],"yields":[189],"highest":[191],"competitive":[194],"overall":[195],"performance,":[196],"whereas":[197],"LaMini-Flan-T5-77M":[198],"provides":[199],"lightweight":[201],"alternative":[202],"stronger":[204],"less":[207],"balanced":[208],"performance.":[209],"We":[210],"also":[211],"similar-word":[213],"injection":[214],"augmentation,":[217],"unlike":[219],"findings,":[221],"it":[222],"did":[223],"not":[224],"yield":[225],"meaningful":[226],"improvements.":[227],"Overall,":[228],"these":[229],"results":[230],"highlight":[231],"both":[232],"opportunities":[234],"trade-offs":[236],"using":[238],"discussion.":[245]},"counts_by_year":[],"updated_date":"2026-03-21T06:36:02.116451","created_date":"2026-03-21T00:00:00"}
