{"id":"https://openalex.org/W4406458887","doi":"https://doi.org/10.1109/bigdata62323.2024.10825591","title":"Model Selection for HERITAGE-AI: Evaluating LLMs for Contextual Data Analysis of Maryland\u2019s Domestic Traffic Ads (1824\u20131864)","display_name":"Model Selection for HERITAGE-AI: Evaluating LLMs for Contextual Data Analysis of Maryland\u2019s Domestic Traffic Ads (1824\u20131864)","publication_year":2024,"publication_date":"2024-12-15","ids":{"openalex":"https://openalex.org/W4406458887","doi":"https://doi.org/10.1109/bigdata62323.2024.10825591"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata62323.2024.10825591","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10825591","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030197011","display_name":"Rajesh Kumar Gnanasekaran","orcid":"https://orcid.org/0000-0003-1775-2455"},"institutions":[{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Rajesh Kumar Gnanasekaran","raw_affiliation_strings":["University of Maryland,College of Information,College Park,USA"],"affiliations":[{"raw_affiliation_string":"University of Maryland,College of Information,College Park,USA","institution_ids":["https://openalex.org/I66946132"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030817406","display_name":"Lori Perine","orcid":null},"institutions":[{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lori Perine","raw_affiliation_strings":["University of Maryland,College of Information,College Park,USA"],"affiliations":[{"raw_affiliation_string":"University of Maryland,College of Information,College Park,USA","institution_ids":["https://openalex.org/I66946132"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073880360","display_name":"Mark F. Conrad","orcid":"https://orcid.org/0000-0001-6625-8889"},"institutions":[{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mark Conrad","raw_affiliation_strings":["University of Maryland,College of Information,College Park,USA"],"affiliations":[{"raw_affiliation_string":"University of Maryland,College of Information,College Park,USA","institution_ids":["https://openalex.org/I66946132"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040009333","display_name":"Richard Marciano","orcid":"https://orcid.org/0000-0002-6813-5046"},"institutions":[{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Richard Marciano","raw_affiliation_strings":["University of Maryland,College of Information,College Park,USA"],"affiliations":[{"raw_affiliation_string":"University of Maryland,College of Information,College Park,USA","institution_ids":["https://openalex.org/I66946132"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5030197011"],"corresponding_institution_ids":["https://openalex.org/I66946132"],"apc_list":null,"apc_paid":null,"fwci":0.2632,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.58810194,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"2419","last_page":"2430"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9919000267982483,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9919000267982483,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.9327999949455261,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12357","display_name":"Digital Media Forensic Detection","score":0.9179999828338623,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.6333626508712769},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5413889288902283},{"id":"https://openalex.org/keywords/transport-engineering","display_name":"Transport engineering","score":0.35085242986679077},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.32585400342941284},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.3231314420700073},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.24238580465316772},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.2256428301334381}],"concepts":[{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.6333626508712769},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5413889288902283},{"id":"https://openalex.org/C22212356","wikidata":"https://www.wikidata.org/wiki/Q775325","display_name":"Transport engineering","level":1,"score":0.35085242986679077},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.32585400342941284},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3231314420700073},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.24238580465316772},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.2256428301334381}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata62323.2024.10825591","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10825591","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Sustainable cities and communities","id":"https://metadata.un.org/sdg/11","score":0.6899999976158142}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306122","display_name":"Institute of Museum and Library Services","ror":"https://ror.org/030prv062"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W3165489349","https://openalex.org/W3208932992","https://openalex.org/W4205536814","https://openalex.org/W4391136507","https://openalex.org/W4399496842","https://openalex.org/W4401159818","https://openalex.org/W4402197278","https://openalex.org/W4403868252","https://openalex.org/W4405621253","https://openalex.org/W6795310648","https://openalex.org/W6866167334"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"The":[0,136],"HERITAGE-AI":[1],"(Harnessing":[2],"Enhanced":[3],"Research":[4],"and":[5,63,85,119,156,175,177,190,203,222,250,269,288,307,320],"Instructional":[6],"Technologies":[7],"for":[8,51,128,167,225,264,300,328],"Archival":[9],"Generative":[10,129],"Exploration":[11],"using":[12,29],"AI),":[13],"as":[14,259],"part":[15],"of":[16,34,38,94,132,149,194,209,243,303],"the":[17,35,44,53,99,124,133,146,150,216,231,235,240,260,304,310,329],"IMLS":[18],"grant":[19],"initiative,":[20],"GenAI-4-Archive,":[21],"aims":[22],"to":[23,97,102,122,139,171,279],"analyze":[24],"sensitive":[25,79,147,227],"historical":[26,71,80,228],"datasets":[27,81],"ethically":[28],"advanced":[30],"AI":[31],"technologies.":[32],"One":[33],"key":[36],"tasks":[37],"this":[39,168,265],"project":[40],"focuses":[41],"on":[42],"selecting":[43],"most":[45,125,261],"suitable":[46,126,262],"Large":[47],"Language":[48],"Model":[49],"(LLM)":[50],"analyzing":[52,226],"Domestic":[54],"Traffic":[55],"Ads":[56],"(DTA)":[57],"published":[58],"in":[59,70,214,290,323],"Maryland":[60],"between":[61],"1824":[62],"1864":[64],"by":[65,238],"slave":[66],"traders\u2014a":[67],"dataset":[68,306,331],"rich":[69],"significance":[72],"yet":[73],"fraught":[74],"with":[75,309],"ethical":[76,84,283,312],"considerations.":[77],"Analyzing":[78],"presents":[82,90],"unique":[83],"technical":[86],"challenges.":[87],"This":[88],"paper":[89],"a":[91,182,291],"comparative":[92],"evaluation":[93,161],"leading":[95],"LLMs":[96],"identify":[98,123],"optimal":[100],"model":[101,127],"meet":[103],"HERITAGE-AI\u2019s":[104],"objectives.":[105],"We":[106],"survey":[107],"contemporary":[108],"models,":[109],"including":[110],"OpenAI\u2019s":[111,245],"GPT-4o,":[112,246],"Anthropic\u2019s":[113],"Claude":[114,270],"Sonnet,":[115],"Meta\u2019s":[116],"Llama":[117],"3.2,":[118],"Google\u2019s":[120],"Gemini,":[121],"AI-based":[130],"analysis":[131,180],"DTA":[134,232,305,330],"dataset.":[135,233],"objective":[137],"is":[138],"select":[140],"an":[141],"LLM":[142],"that":[143,200],"can":[144],"handle":[145],"nature":[148],"data":[151,229],"responsibly":[152],"while":[153,201],"providing":[154,215],"accurate":[155],"insightful":[157],"analysis.":[158,332],"Three":[159],"critical":[160],"criteria,":[162],"among":[163],"others,":[164],"are":[165],"established":[166],"reason:":[169],"Sensitivity":[170],"Historical":[172],"Context,":[173],"Privacy":[174],"Security,":[176],"Customizability.":[178],"Our":[179,197],"follows":[181],"three-step":[183],"approach:":[184],"evaluating":[185],"free":[186,202],"versions,":[187,189],"paid":[188,204],"enterprise-grade":[191],"cloud-based":[192,241,292],"implementations":[193,242],"these":[195],"LLMs.":[196],"findings":[198],"reveal":[199],"versions":[205],"offer":[206],"varying":[207],"degrees":[208],"accessibility,":[210],"they":[211],"fall":[212],"short":[213],"necessary":[217],"privacy,":[218],"security,":[219],"multi-user":[220],"access,":[221],"customization":[223],"required":[224],"like":[230],"In":[234],"third":[236],"step,":[237],"comparing":[239],"Azure":[244,255,325],"AWS":[247,251],"Bedrock\u2019s":[248,252],"Claude,":[249],"Llama3.2":[253],"LLMs,":[254],"openAI":[256],"GPT-4o":[257,268,327],"emerges":[258],"option":[263],"project.":[266],"Although":[267],"were":[271],"close":[272],"contenders,":[273],"Gpt-4o":[274],"demonstrated":[275],"robust":[276,285],"mechanisms":[277],"due":[278],"its":[280],"high":[281],"accuracy,":[282],"sensitivity,":[284],"privacy":[286],"controls,":[287],"scalability":[289],"environment.":[293],"It":[294],"also":[295],"offers":[296],"extensive":[297],"customizability,":[298],"allowing":[299],"effective":[301],"integration":[302],"alignment":[308],"project\u2019s":[311],"standards.":[313],"Future":[314],"work":[315],"will":[316],"involve":[317],"domain":[318],"experts":[319],"community":[321],"members":[322],"implementing":[324],"OpenAI":[326]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
