{"id":"https://openalex.org/W4392406089","doi":"https://doi.org/10.1109/access.2024.3373470","title":"Feature-Based Text Search Engine Mitigating Data Diversity Problem Using Pre-Trained Large Language Model for Fast Deployment Services","display_name":"Feature-Based Text Search Engine Mitigating Data Diversity Problem Using Pre-Trained Large Language Model for Fast Deployment Services","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4392406089","doi":"https://doi.org/10.1109/access.2024.3373470"},"language":"en","primary_location":{"id":"doi:10.1109/access.2024.3373470","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2024.3373470","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/10459082.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/10459082.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5053935099","display_name":"Y. Jeong","orcid":"https://orcid.org/0009-0006-5746-6380"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yongwoo Jeong","raw_affiliation_strings":["Rowan Inc., Seoul, Republic of Korea","Rowan, Inc, Seoul, Republic of Korea"],"raw_orcid":"https://orcid.org/0009-0006-5746-6380","affiliations":[{"raw_affiliation_string":"Rowan Inc., Seoul, Republic of Korea","institution_ids":[]},{"raw_affiliation_string":"Rowan, Inc, Seoul, Republic of Korea","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102941457","display_name":"Jiseon Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiseon Yang","raw_affiliation_strings":["Rowan Inc., Seoul, Republic of Korea","Rowan, Inc, Seoul, Republic of Korea"],"raw_orcid":"https://orcid.org/0009-0001-0694-4833","affiliations":[{"raw_affiliation_string":"Rowan Inc., Seoul, Republic of Korea","institution_ids":[]},{"raw_affiliation_string":"Rowan, Inc, Seoul, Republic of Korea","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091889621","display_name":"In Ho Choi","orcid":"https://orcid.org/0000-0003-3875-3529"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"In Ho Choi","raw_affiliation_strings":["Rowan Inc., Seoul, Republic of Korea","Rowan, Inc, Seoul, Republic of Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Rowan Inc., Seoul, Republic of Korea","institution_ids":[]},{"raw_affiliation_string":"Rowan, Inc, Seoul, Republic of Korea","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5059174248","display_name":"Ju\u2010Yeon Lee","orcid":"https://orcid.org/0000-0002-0176-2184"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Juyeon Lee","raw_affiliation_strings":["Rowan Inc., Seoul, Republic of Korea","Rowan, Inc, Seoul, Republic of Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Rowan Inc., Seoul, Republic of Korea","institution_ids":[]},{"raw_affiliation_string":"Rowan, Inc, Seoul, Republic of Korea","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5053935099"],"corresponding_institution_ids":[],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":0.9762,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.78022212,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":"12","issue":null,"first_page":"48145","last_page":"48157"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.991599977016449,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.991599977016449,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9901999831199646,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9865999817848206,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.8234670758247375},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7991165518760681},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.6270096302032471},{"id":"https://openalex.org/keywords/diversity","display_name":"Diversity (politics)","score":0.5898780822753906},{"id":"https://openalex.org/keywords/search-engine","display_name":"Search engine","score":0.5306885242462158},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3878556191921234},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.38086578249931335},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3420674204826355},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.3262888789176941},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.1952577531337738}],"concepts":[{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.8234670758247375},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7991165518760681},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.6270096302032471},{"id":"https://openalex.org/C2781316041","wikidata":"https://www.wikidata.org/wiki/Q1230584","display_name":"Diversity (politics)","level":2,"score":0.5898780822753906},{"id":"https://openalex.org/C97854310","wikidata":"https://www.wikidata.org/wiki/Q19541","display_name":"Search engine","level":2,"score":0.5306885242462158},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3878556191921234},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.38086578249931335},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3420674204826355},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3262888789176941},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.1952577531337738},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2024.3373470","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2024.3373470","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/10459082.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:5d4fd406345c44cbb5dda891c9f791b5","is_oa":true,"landing_page_url":"https://doaj.org/article/5d4fd406345c44cbb5dda891c9f791b5","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 12, Pp 48145-48157 (2024)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2024.3373470","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2024.3373470","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/10459082.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4392406089.pdf","grobid_xml":"https://content.openalex.org/works/W4392406089.grobid-xml"},"referenced_works_count":47,"referenced_works":["https://openalex.org/W1821462560","https://openalex.org/W1965555277","https://openalex.org/W1967082914","https://openalex.org/W1981257123","https://openalex.org/W1987971958","https://openalex.org/W2037866349","https://openalex.org/W2043909051","https://openalex.org/W2051224630","https://openalex.org/W2055527244","https://openalex.org/W2087006489","https://openalex.org/W2087082746","https://openalex.org/W2088381487","https://openalex.org/W2107310198","https://openalex.org/W2111298664","https://openalex.org/W2112006025","https://openalex.org/W2125559251","https://openalex.org/W2147152072","https://openalex.org/W2147717514","https://openalex.org/W2150665743","https://openalex.org/W2163969215","https://openalex.org/W2187089797","https://openalex.org/W2296073425","https://openalex.org/W2320638839","https://openalex.org/W2574239693","https://openalex.org/W2577597748","https://openalex.org/W2739853341","https://openalex.org/W2889326796","https://openalex.org/W2896457183","https://openalex.org/W2906920259","https://openalex.org/W2971292190","https://openalex.org/W2971296908","https://openalex.org/W2998702515","https://openalex.org/W3046206337","https://openalex.org/W3115479046","https://openalex.org/W3201221410","https://openalex.org/W4210931461","https://openalex.org/W4224057314","https://openalex.org/W4287867774","https://openalex.org/W4288064589","https://openalex.org/W4385245566","https://openalex.org/W6637131181","https://openalex.org/W6682132143","https://openalex.org/W6755207826","https://openalex.org/W6757904846","https://openalex.org/W6767111925","https://openalex.org/W6767904763","https://openalex.org/W6773642575"],"related_works":["https://openalex.org/W2770234245","https://openalex.org/W96612179","https://openalex.org/W4229499248","https://openalex.org/W2566006169","https://openalex.org/W1567818861","https://openalex.org/W2987774938","https://openalex.org/W4256492088","https://openalex.org/W632915154","https://openalex.org/W2055733372","https://openalex.org/W3022067003"],"abstract_inverted_index":{"The":[0,175],"fairness":[1,31],"&":[2,235],"bias":[3,33],"of":[4,7,73,97,152,202],"narrow":[5],"coverage":[6,72],"AI":[8,13,18,59],"becomes":[9],"another":[10],"challenge":[11],"for":[12,131,173,224,231],"researchers.":[14],"If":[15],"a":[16,21,53,61,135,156,210],"commercial":[17],"trains":[19],"with":[20,160],"biased":[22],"dataset,":[23],"there":[24],"will":[25],"be":[26,50,116],"severe":[27],"gender":[28],"or":[29,63],"racial":[30],"and":[32,82,171,233,255],"issues.":[34,121],"[43,44].":[35],"Since":[36],"the":[37,46,74,78,85,102,105,126,161,199,239,248,253],"researchers":[38],"use":[39],"primary":[40],"language":[41,128],"datasets":[42],"to":[43,80,93,118,140,191],"train":[44],"AI,":[45],"broad":[47],"audience":[48,79],"cannot":[49],"satisfied":[51],"if":[52,84],"novel":[54],"LLM":[55],"(Large":[56],"Language":[57],"Model)":[58],"shows":[60],"knowledge":[62],"creativity":[64],"limitation":[65],"on":[66,125],"their":[67],"specific":[68],"spoken":[69],"language.":[70],"Narrow":[71],"LLMs":[75],"can":[76,115,246],"lead":[77],"misinterpretation":[81],"confusion":[83],"service":[86],"involves":[87],"STT":[88,132],"(Speech-To-Text).":[89],"In":[90,238],"this":[91,95],"paper,":[92],"overcome":[94],"issue":[96],"data":[98],"diversity,":[99],"we":[100,154,168,241],"propose":[101],"idea":[103,201,245],"that":[104,114,243],"embedded,":[106],"extracted":[107,176],"features":[108,185],"have":[109],"captured":[110],"semantic":[111],"proximity":[112],"information":[113],"useful":[117],"mitigate":[119,247],"diversity":[120,249],"This":[122],"project":[123],"focused":[124],"Korean":[127,162],"food":[129],"dataset":[130,164],"services,":[133],"where":[134],"narrow-trained":[136],"A.I.":[137],"is":[138],"prone":[139],"show":[141],"its":[142],"limitations,":[143],"such":[144],"as":[145],"lifestyle-related":[146],"elements.":[147],"To":[148],"present":[149,220],"our":[150,244],"proof":[151],"concept,":[153],"trained":[155],"baseline":[157],"model,":[158],"GPT2,":[159],"Wikipedia":[163],"in":[165],"2022.":[166],"Then,":[167],"employed":[169],"DistilBERT":[170],"KoBERT":[172],"comparison.":[174],"<italic":[177,181],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[178,182],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">hidden</i>":[179],"_":[180],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">state</i>":[183],"_output":[184],"from":[186],"each":[187],"model":[188,254],"were":[189],"utilized":[190],"build":[192],"feature-extraction-based":[193],"text":[194],"search":[195],"engines.":[196],"We":[197,218],"used":[198],"same":[200],"Local":[203],"Sensitive":[204],"Hashing":[205],"(LSH)":[206],"but":[207],"effectively":[208],"located":[209],"similar":[211],"hash":[212],"by":[213],"applying":[214],"transposed":[215],"weights":[216],"[38].":[217],"also":[219],"conventional":[221],"classification":[222],"benchmarks":[223],"performance":[225],"comparison":[226],"using":[227],"top-k":[228],"measurements,":[229],"times":[230],"training":[232],"memory":[234],"disc":[236],"consumptions.":[237],"discussion,":[240],"proposed":[242],"problem":[250],"without":[251],"re-training":[252],"tokenizer.":[256]},"counts_by_year":[{"year":2025,"cited_by_count":3}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-10-10T00:00:00"}
