{"id":"https://openalex.org/W4391464262","doi":"https://doi.org/10.1162/qss_a_00285","title":"Large-scale text analysis using generative language models: A case study in discovering public value expressions in AI patents","display_name":"Large-scale text analysis using generative language models: A case study in discovering public value expressions in AI patents","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4391464262","doi":"https://doi.org/10.1162/qss_a_00285"},"language":"en","primary_location":{"id":"doi:10.1162/qss_a_00285","is_oa":true,"landing_page_url":"https://doi.org/10.1162/qss_a_00285","pdf_url":"https://direct.mit.edu/qss/article-pdf/doi/10.1162/qss_a_00285/2325312/qss_a_00285.pdf","source":{"id":"https://openalex.org/S4210195326","display_name":"Quantitative Science Studies","issn_l":"2641-3337","issn":["2641-3337"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310315718","host_organization_name":"The MIT Press","host_organization_lineage":["https://openalex.org/P4310315718"],"host_organization_lineage_names":["The MIT Press"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Quantitative Science Studies","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://direct.mit.edu/qss/article-pdf/doi/10.1162/qss_a_00285/2325312/qss_a_00285.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5064583740","display_name":"Sergio Pelaez","orcid":"https://orcid.org/0000-0003-2640-5623"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Sergio Pelaez","raw_affiliation_strings":["School of Public Policy, Georgia Institute of Technology, Atlanta, GA, USA"],"raw_orcid":"https://orcid.org/0000-0003-2640-5623","affiliations":[{"raw_affiliation_string":"School of Public Policy, Georgia Institute of Technology, Atlanta, GA, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085250715","display_name":"Gaurav Verma","orcid":"https://orcid.org/0000-0001-6182-9857"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gaurav Verma","raw_affiliation_strings":["School of Computational Science and Engineering, College of Computing, Georgia Institute of Technology, Atlanta, GA, USA"],"raw_orcid":"https://orcid.org/0000-0001-6182-9857","affiliations":[{"raw_affiliation_string":"School of Computational Science and Engineering, College of Computing, Georgia Institute of Technology, Atlanta, GA, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057244875","display_name":"B\u00e1rbara Ribeiro","orcid":"https://orcid.org/0000-0002-5230-1695"},"institutions":[{"id":"https://openalex.org/I181418319","display_name":"SKEMA Business School","ror":"https://ror.org/036h8vg94","country_code":"FR","type":"education","lineage":["https://openalex.org/I181418319"]},{"id":"https://openalex.org/I28407311","display_name":"University of Manchester","ror":"https://ror.org/027m9bs27","country_code":"GB","type":"education","lineage":["https://openalex.org/I28407311"]}],"countries":["FR","GB"],"is_corresponding":false,"raw_author_name":"Barbara Ribeiro","raw_affiliation_strings":["Manchester Institute of Innovation Research, University of Manchester, Manchester, UK","SKEMA Business School, Universit\u00e9 C\u00f4te d\u2019Azur, Campus Grand Paris, Paris, France"],"raw_orcid":"https://orcid.org/0000-0002-5230-1695","affiliations":[{"raw_affiliation_string":"Manchester Institute of Innovation Research, University of Manchester, Manchester, UK","institution_ids":["https://openalex.org/I28407311"]},{"raw_affiliation_string":"SKEMA Business School, Universit\u00e9 C\u00f4te d\u2019Azur, Campus Grand Paris, Paris, France","institution_ids":["https://openalex.org/I181418319"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101685087","display_name":"Philip Shapira","orcid":"https://orcid.org/0000-0003-2488-5985"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]},{"id":"https://openalex.org/I28407311","display_name":"University of Manchester","ror":"https://ror.org/027m9bs27","country_code":"GB","type":"education","lineage":["https://openalex.org/I28407311"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Philip Shapira","raw_affiliation_strings":["Manchester Institute of Innovation Research, University of Manchester, Manchester, UK","School of Public Policy, Georgia Institute of Technology, Atlanta, GA, USA","Manchester Institute of Innovation Research, University of Manchester, Manchester, UK, and School of Public Policy Georgia Institute of Technology Atlanta, USA"],"raw_orcid":"https://orcid.org/0000-0003-2488-5985","affiliations":[{"raw_affiliation_string":"Manchester Institute of Innovation Research, University of Manchester, Manchester, UK","institution_ids":["https://openalex.org/I28407311"]},{"raw_affiliation_string":"School of Public Policy, Georgia Institute of Technology, Atlanta, GA, USA","institution_ids":["https://openalex.org/I130701444"]},{"raw_affiliation_string":"Manchester Institute of Innovation Research, University of Manchester, Manchester, UK, and School of Public Policy Georgia Institute of Technology Atlanta, USA","institution_ids":["https://openalex.org/I130701444","https://openalex.org/I28407311"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5064583740"],"corresponding_institution_ids":["https://openalex.org/I130701444"],"apc_list":{"value":800,"currency":"USD","value_usd":800},"apc_paid":{"value":800,"currency":"USD","value_usd":800},"fwci":48.7593,"has_fulltext":true,"cited_by_count":27,"citation_normalized_percentile":{"value":0.99696173,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":"5","issue":"1","first_page":"153","last_page":"169"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13910","display_name":"Computational and Text Analysis Methods","score":0.8519999980926514,"subfield":{"id":"https://openalex.org/subfields/3300","display_name":"General Social Sciences"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T13910","display_name":"Computational and Text Analysis Methods","score":0.8519999980926514,"subfield":{"id":"https://openalex.org/subfields/3300","display_name":"General Social Sciences"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13851","display_name":"Law, AI, and Intellectual Property","score":0.8288999795913696,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.7844767570495605},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5896554589271545},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.5698566436767578},{"id":"https://openalex.org/keywords/value","display_name":"Value (mathematics)","score":0.5325701832771301},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.498410701751709},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.4967832863330841},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.42340680956840515},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.3632647395133972},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.18140378594398499},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.12879735231399536},{"id":"https://openalex.org/keywords/cartography","display_name":"Cartography","score":0.09840789437294006},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.06588149070739746}],"concepts":[{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.7844767570495605},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5896554589271545},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.5698566436767578},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.5325701832771301},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.498410701751709},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.4967832863330841},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42340680956840515},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.3632647395133972},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.18140378594398499},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.12879735231399536},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.09840789437294006},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.06588149070739746}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1162/qss_a_00285","is_oa":true,"landing_page_url":"https://doi.org/10.1162/qss_a_00285","pdf_url":"https://direct.mit.edu/qss/article-pdf/doi/10.1162/qss_a_00285/2325312/qss_a_00285.pdf","source":{"id":"https://openalex.org/S4210195326","display_name":"Quantitative Science Studies","issn_l":"2641-3337","issn":["2641-3337"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310315718","host_organization_name":"The MIT Press","host_organization_lineage":["https://openalex.org/P4310315718"],"host_organization_lineage_names":["The MIT Press"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Quantitative Science Studies","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:090e906392db460d98a2db9609f6661c","is_oa":true,"landing_page_url":"https://doaj.org/article/090e906392db460d98a2db9609f6661c","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Quantitative Science Studies, Vol 5, Iss 1 (2024)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1162/qss_a_00285","is_oa":true,"landing_page_url":"https://doi.org/10.1162/qss_a_00285","pdf_url":"https://direct.mit.edu/qss/article-pdf/doi/10.1162/qss_a_00285/2325312/qss_a_00285.pdf","source":{"id":"https://openalex.org/S4210195326","display_name":"Quantitative Science Studies","issn_l":"2641-3337","issn":["2641-3337"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310315718","host_organization_name":"The MIT Press","host_organization_lineage":["https://openalex.org/P4310315718"],"host_organization_lineage_names":["The MIT Press"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Quantitative Science Studies","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6285227557","display_name":null,"funder_award_id":"BB/W013770/1","funder_id":"https://openalex.org/F4320334629","funder_display_name":"Biotechnology and Biological Sciences Research Council"},{"id":"https://openalex.org/G8582146330","display_name":null,"funder_award_id":"895-2018-1006","funder_id":"https://openalex.org/F4320334617","funder_display_name":"Social Sciences and Humanities Research Council of Canada"},{"id":"https://openalex.org/G8751587367","display_name":"21EBTA: EB-AI Consortium for Bioengineered Cells &amp; Systems (AI-4-EB)","funder_award_id":"BB/W013770/1","funder_id":"https://openalex.org/F4320334629","funder_display_name":"Biotechnology and Biological Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320318300","display_name":"Snap","ror":"https://ror.org/054nr9p58"},{"id":"https://openalex.org/F4320332167","display_name":"Directorate for Biological Sciences","ror":"https://ror.org/001xhss06"},{"id":"https://openalex.org/F4320334617","display_name":"Social Sciences and Humanities Research Council of Canada","ror":"https://ror.org/04j5jqy92"},{"id":"https://openalex.org/F4320334629","display_name":"Biotechnology and Biological Sciences Research Council","ror":"https://ror.org/00cwqg982"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4391464262.pdf"},"referenced_works_count":39,"referenced_works":["https://openalex.org/W632139601","https://openalex.org/W1604621732","https://openalex.org/W1880262756","https://openalex.org/W1994775187","https://openalex.org/W2101105183","https://openalex.org/W2160792700","https://openalex.org/W2163436064","https://openalex.org/W2896457183","https://openalex.org/W2903272491","https://openalex.org/W2948740140","https://openalex.org/W2963456134","https://openalex.org/W2965373594","https://openalex.org/W2978017171","https://openalex.org/W2988293309","https://openalex.org/W2996428491","https://openalex.org/W3020436075","https://openalex.org/W3033187248","https://openalex.org/W3113751171","https://openalex.org/W3124680412","https://openalex.org/W3124761574","https://openalex.org/W3213382970","https://openalex.org/W4200271138","https://openalex.org/W4206133716","https://openalex.org/W4210764005","https://openalex.org/W4214604362","https://openalex.org/W4221143046","https://openalex.org/W4229005866","https://openalex.org/W4240387987","https://openalex.org/W4288083802","https://openalex.org/W4292402516","https://openalex.org/W4307079201","https://openalex.org/W4308506027","https://openalex.org/W4309224269","https://openalex.org/W4313421434","https://openalex.org/W4322718191","https://openalex.org/W4360836968","https://openalex.org/W4360891289","https://openalex.org/W4362597819","https://openalex.org/W6639619044"],"related_works":["https://openalex.org/W4365211920","https://openalex.org/W3014948380","https://openalex.org/W4380551139","https://openalex.org/W2280377497","https://openalex.org/W4387506531","https://openalex.org/W3174044702","https://openalex.org/W4238433571","https://openalex.org/W2967848559","https://openalex.org/W4283803360","https://openalex.org/W4317695495"],"abstract_inverted_index":{"Abstract":[0],"We":[1,84,158],"put":[2],"forward":[3],"a":[4,8,55],"novel":[5],"approach":[6,23,164],"using":[7,93],"generative":[9,186],"language":[10,187],"model":[11],"(GPT-4)":[12],"to":[13,26,123,134],"produce":[14],"labels":[15,87,131,195],"and":[16,49,61,79,88,96,105,138,153,172,179,196],"rationales":[17,80,89],"for":[18,38,59,81,149,165],"large-scale":[19,167],"text":[20,34,82,168],"analysis.":[21],"The":[22,129],"is":[24,72],"used":[25,133],"discover":[27,124],"public":[28,63,113,126],"value":[29,64,114,127],"expressions":[30,65,115],"in":[31,66,193],"patents.":[32],"Using":[33],"(5.4":[35],"million":[36],"sentences)":[37],"154,934":[39],"US":[40],"AI":[41],"patent":[42],"documents":[43],"from":[44,116],"the":[45,86,142,150,160],"United":[46],"States":[47],"Patent":[48],"Trademark":[50],"Office":[51],"(USPTO),":[52],"we":[53,183],"design":[54,178],"semi-automated,":[56],"human-supervised":[57],"framework":[58,177],"identifying":[60],"labeling":[62],"these":[67],"sentences.":[68],"A":[69],"GPT-4":[70,92,107],"prompt":[71],"developed":[73],"that":[74,100,185],"includes":[75],"definitions,":[76],"guidelines,":[77],"examples,":[78],"classification.":[83],"evaluate":[85],"produced":[90],"by":[91],"BLEU":[94],"scores":[95,148],"topic":[97],"modeling,":[98],"finding":[99],"they":[101],"are":[102,132],"accurate,":[103],"diverse,":[104],"faithful.":[106],"achieved":[108],"an":[109],"advanced":[110],"recognition":[111],"of":[112,162],"our":[117,163],"framework,":[118],"which":[119],"it":[120],"also":[121],"uses":[122],"unseen":[125],"expressions.":[128],"GPT-produced":[130],"train":[135],"BERT-based":[136],"classifiers":[137],"predict":[139],"sentences":[140],"on":[141],"entire":[143],"database,":[144],"achieving":[145],"high":[146],"F1":[147],"3-class":[151],"(0.85)":[152],"2-class":[154],"classification":[155],"(0.91)":[156],"tasks.":[157],"discuss":[159],"implications":[161],"conducting":[166],"analyses":[169],"with":[170],"complex":[171],"abstract":[173],"concepts.":[174],"With":[175],"careful":[176],"interactive":[180],"human":[181],"oversight,":[182],"suggest":[184],"models":[188],"can":[189],"offer":[190],"significant":[191],"assistance":[192],"producing":[194],"rationales.":[197]},"counts_by_year":[{"year":2026,"cited_by_count":5},{"year":2025,"cited_by_count":17},{"year":2024,"cited_by_count":5}],"updated_date":"2026-01-21T23:30:37.877113","created_date":"2025-10-10T00:00:00"}
