{"id":"https://openalex.org/W4390493562","doi":"https://doi.org/10.1109/snams60348.2023.10375400","title":"Semantic Compression with Large Language Models","display_name":"Semantic Compression with Large Language Models","publication_year":2023,"publication_date":"2023-11-21","ids":{"openalex":"https://openalex.org/W4390493562","doi":"https://doi.org/10.1109/snams60348.2023.10375400"},"language":"en","primary_location":{"id":"doi:10.1109/snams60348.2023.10375400","is_oa":false,"landing_page_url":"https://doi.org/10.1109/snams60348.2023.10375400","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 Tenth International Conference on Social Networks Analysis, Management and Security (SNAMS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5045719511","display_name":"Henry Gilbert","orcid":null},"institutions":[{"id":"https://openalex.org/I200719446","display_name":"Vanderbilt University","ror":"https://ror.org/02vm5rt34","country_code":"US","type":"education","lineage":["https://openalex.org/I200719446"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Henry Gilbert","raw_affiliation_strings":["Vanderbilt University,Dept. of Computer Science,Nashville,TN,USA","Dept. of Computer Science, Vanderbilt University, Nashville, TN, USA"],"affiliations":[{"raw_affiliation_string":"Vanderbilt University,Dept. of Computer Science,Nashville,TN,USA","institution_ids":["https://openalex.org/I200719446"]},{"raw_affiliation_string":"Dept. of Computer Science, Vanderbilt University, Nashville, TN, USA","institution_ids":["https://openalex.org/I200719446"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042041372","display_name":"Michael Sandborn","orcid":"https://orcid.org/0000-0001-8592-6758"},"institutions":[{"id":"https://openalex.org/I200719446","display_name":"Vanderbilt University","ror":"https://ror.org/02vm5rt34","country_code":"US","type":"education","lineage":["https://openalex.org/I200719446"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michael Sandborn","raw_affiliation_strings":["Vanderbilt University,Dept. of Computer Science,Nashville,TN,USA","Dept. of Computer Science, Vanderbilt University, Nashville, TN, USA"],"affiliations":[{"raw_affiliation_string":"Vanderbilt University,Dept. of Computer Science,Nashville,TN,USA","institution_ids":["https://openalex.org/I200719446"]},{"raw_affiliation_string":"Dept. of Computer Science, Vanderbilt University, Nashville, TN, USA","institution_ids":["https://openalex.org/I200719446"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082548649","display_name":"Douglas C. Schmidt","orcid":"https://orcid.org/0000-0002-7389-4995"},"institutions":[{"id":"https://openalex.org/I200719446","display_name":"Vanderbilt University","ror":"https://ror.org/02vm5rt34","country_code":"US","type":"education","lineage":["https://openalex.org/I200719446"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Douglas C. Schmidt","raw_affiliation_strings":["Vanderbilt University,Dept. of Computer Science,Nashville,TN,USA","Dept. of Computer Science, Vanderbilt University, Nashville, TN, USA"],"affiliations":[{"raw_affiliation_string":"Vanderbilt University,Dept. of Computer Science,Nashville,TN,USA","institution_ids":["https://openalex.org/I200719446"]},{"raw_affiliation_string":"Dept. of Computer Science, Vanderbilt University, Nashville, TN, USA","institution_ids":["https://openalex.org/I200719446"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002878202","display_name":"Jesse Spencer-Smith","orcid":"https://orcid.org/0000-0001-8165-6658"},"institutions":[{"id":"https://openalex.org/I200719446","display_name":"Vanderbilt University","ror":"https://ror.org/02vm5rt34","country_code":"US","type":"education","lineage":["https://openalex.org/I200719446"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jesse Spencer-Smith","raw_affiliation_strings":["Vanderbilt University,Dept. of Computer Science,Nashville,TN,USA","Dept. of Computer Science, Vanderbilt University, Nashville, TN, USA"],"affiliations":[{"raw_affiliation_string":"Vanderbilt University,Dept. of Computer Science,Nashville,TN,USA","institution_ids":["https://openalex.org/I200719446"]},{"raw_affiliation_string":"Dept. of Computer Science, Vanderbilt University, Nashville, TN, USA","institution_ids":["https://openalex.org/I200719446"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023026501","display_name":"Jules White","orcid":"https://orcid.org/0000-0002-6331-2365"},"institutions":[{"id":"https://openalex.org/I200719446","display_name":"Vanderbilt University","ror":"https://ror.org/02vm5rt34","country_code":"US","type":"education","lineage":["https://openalex.org/I200719446"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jules White","raw_affiliation_strings":["Vanderbilt University,Dept. of Computer Science,Nashville,TN,USA","Dept. of Computer Science, Vanderbilt University, Nashville, TN, USA"],"affiliations":[{"raw_affiliation_string":"Vanderbilt University,Dept. of Computer Science,Nashville,TN,USA","institution_ids":["https://openalex.org/I200719446"]},{"raw_affiliation_string":"Dept. of Computer Science, Vanderbilt University, Nashville, TN, USA","institution_ids":["https://openalex.org/I200719446"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5045719511"],"corresponding_institution_ids":["https://openalex.org/I200719446"],"apc_list":null,"apc_paid":null,"fwci":6.1614,"has_fulltext":false,"cited_by_count":35,"citation_normalized_percentile":{"value":0.97250825,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9667999744415283,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.9560999870300293,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7433677911758423},{"id":"https://openalex.org/keywords/automatic-summarization","display_name":"Automatic summarization","score":0.6427505016326904},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6378024220466614},{"id":"https://openalex.org/keywords/lossy-compression","display_name":"Lossy compression","score":0.6166738867759705},{"id":"https://openalex.org/keywords/lossless-compression","display_name":"Lossless compression","score":0.5658838152885437},{"id":"https://openalex.org/keywords/semantic-compression","display_name":"Semantic compression","score":0.4967198967933655},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4469192326068878},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4454624056816101},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.44144654273986816},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.42109864950180054},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.42090803384780884},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3607918620109558},{"id":"https://openalex.org/keywords/semantic-computing","display_name":"Semantic computing","score":0.2509852349758148},{"id":"https://openalex.org/keywords/semantic-web","display_name":"Semantic Web","score":0.1849166750907898},{"id":"https://openalex.org/keywords/semantic-technology","display_name":"Semantic technology","score":0.13782697916030884},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.1148141622543335}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7433677911758423},{"id":"https://openalex.org/C170858558","wikidata":"https://www.wikidata.org/wiki/Q1394144","display_name":"Automatic summarization","level":2,"score":0.6427505016326904},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6378024220466614},{"id":"https://openalex.org/C165021410","wikidata":"https://www.wikidata.org/wiki/Q55564","display_name":"Lossy compression","level":2,"score":0.6166738867759705},{"id":"https://openalex.org/C81081738","wikidata":"https://www.wikidata.org/wiki/Q55542","display_name":"Lossless compression","level":3,"score":0.5658838152885437},{"id":"https://openalex.org/C202708506","wikidata":"https://www.wikidata.org/wiki/Q7449050","display_name":"Semantic compression","level":5,"score":0.4967198967933655},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4469192326068878},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4454624056816101},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.44144654273986816},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.42109864950180054},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.42090803384780884},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3607918620109558},{"id":"https://openalex.org/C511149849","wikidata":"https://www.wikidata.org/wiki/Q7449051","display_name":"Semantic computing","level":3,"score":0.2509852349758148},{"id":"https://openalex.org/C2129575","wikidata":"https://www.wikidata.org/wiki/Q54837","display_name":"Semantic Web","level":2,"score":0.1849166750907898},{"id":"https://openalex.org/C6881194","wikidata":"https://www.wikidata.org/wiki/Q7449091","display_name":"Semantic technology","level":4,"score":0.13782697916030884},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.1148141622543335}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/snams60348.2023.10375400","is_oa":false,"landing_page_url":"https://doi.org/10.1109/snams60348.2023.10375400","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 Tenth International Conference on Social Networks Analysis, Management and Security (SNAMS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.699999988079071,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W1614298861","https://openalex.org/W2950568044","https://openalex.org/W2964150020","https://openalex.org/W3177450194","https://openalex.org/W3185341429","https://openalex.org/W3195577433","https://openalex.org/W4312900176","https://openalex.org/W4321649710","https://openalex.org/W4360836968","https://openalex.org/W4367018374","https://openalex.org/W4378189609","https://openalex.org/W4392669753","https://openalex.org/W6800751262","https://openalex.org/W6849479259"],"related_works":["https://openalex.org/W2385628723","https://openalex.org/W2547124190","https://openalex.org/W3180760233","https://openalex.org/W1970394887","https://openalex.org/W3035703949","https://openalex.org/W4247601675","https://openalex.org/W2118338613","https://openalex.org/W755971114","https://openalex.org/W1982468865","https://openalex.org/W4210455546"],"abstract_inverted_index":{"The":[0],"rise":[1],"of":[2,40,67,76,109,135,156,178,208],"large":[3,62],"language":[4],"models":[5],"(LLMs)":[6],"is":[7,78,114],"revolutionizing":[8],"information":[9,26],"retrieval,":[10],"question":[11],"answering,":[12],"summarization,":[13],"and":[14,42,144,152,170,184,200],"code":[15],"generation":[16],"tasks.":[17],"However,":[18],"in":[19,85],"addition":[20],"to":[21,72,94,121,158,215],"confidently":[22],"presenting":[23],"factually":[24],"inaccurate":[25],"at":[27,49],"times":[28],"(known":[29],"as":[30,103,105],"\u201challucinations\u201d),":[31],"LLMs":[32,157,188],"are":[33],"also":[34],"inherently":[35],"limited":[36],"by":[37,186],"the":[38,74,100,128,133,154,176,187,205,209],"number":[39],"input":[41],"output":[43],"tokens":[44,218],"that":[45,58,195],"can":[46,197],"be":[47,91],"processed":[48],"once,":[50],"making":[51],"them":[52],"potentially":[53],"less":[54],"effective":[55],"on":[56,123,142],"tasks":[57],"require":[59],"processing":[60],"a":[61,106,213],"set":[63],"or":[64,81,112],"continuous":[65],"stream":[66],"information.":[68],"A":[69],"common":[70],"approach":[71],"reducing":[73],"size":[75],"data":[77],"through":[79],"lossless":[80],"lossy":[82],"compression.":[83],"Yet,":[84],"some":[86],"cases":[87],"it":[88],"may":[89],"not":[90],"strictly":[92],"necessary":[93],"perfectly":[95],"recover":[96],"every":[97],"detail":[98],"from":[99,130],"original":[101,210],"data,":[102],"long":[104],"requisite":[107],"level":[108,177],"semantic":[110,206],"precision":[111],"intent":[113,180],"conveyed.":[115],"This":[116],"paper":[117],"presents":[118],"three":[119],"contributions":[120],"research":[122],"LLMs.":[124],"First,":[125],"we":[126,150,162,189],"present":[127,163],"results":[129,193],"experiments":[131],"exploring":[132],"viability":[134],"\u201capproximate":[136],"compression\u201d":[137],"using":[138],"LLMs,":[139],"focusing":[140],"specifically":[141],"GPT-3.5":[143],"GPT-4":[145,196],"via":[146],"ChatGPT":[147],"interfaces.":[148],"Second,":[149],"investigate":[151],"quantify":[153,175],"capability":[155],"compress":[159,199],"text.":[160],"Third,":[161],"two":[164],"novel":[165],"metrics-Exact":[166],"Reconstructive":[167],"Effectiveness":[168,173],"(ERE)":[169],"Semantic":[171],"Reconstruction":[172],"(SRE)-that":[174],"preserved":[179],"between":[181],"text":[182,202],"compressed":[183],"decompressed":[185],"studied.":[190],"Our":[191],"initial":[192],"indicate":[194],"effectively":[198],"reconstruct":[201],"while":[203],"preserving":[204],"essence":[207],"text,":[211],"providing":[212],"path":[214],"leverage":[216],"more":[217],"than":[219],"current":[220],"limits":[221],"allow.":[222]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":12},{"year":2024,"cited_by_count":17},{"year":2023,"cited_by_count":3}],"updated_date":"2026-03-18T14:38:29.013473","created_date":"2025-10-10T00:00:00"}
