{"id":"https://openalex.org/W2030555214","doi":"https://doi.org/10.1109/isi.2013.6578781","title":"Use of latent semantic indexing to identify name variants in large data collections","display_name":"Use of latent semantic indexing to identify name variants in large data collections","publication_year":2013,"publication_date":"2013-06-01","ids":{"openalex":"https://openalex.org/W2030555214","doi":"https://doi.org/10.1109/isi.2013.6578781","mag":"2030555214"},"language":"en","primary_location":{"id":"doi:10.1109/isi.2013.6578781","is_oa":false,"landing_page_url":"https://doi.org/10.1109/isi.2013.6578781","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2013 IEEE International Conference on Intelligence and Security Informatics","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088893823","display_name":"Roger Bradford","orcid":"https://orcid.org/0000-0003-1750-3125"},"institutions":[{"id":"https://openalex.org/I138285227","display_name":"Agilent Technologies (United States)","ror":"https://ror.org/02tryst02","country_code":"US","type":"company","lineage":["https://openalex.org/I138285227"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"R.B. Bradford","raw_affiliation_strings":["Agilent Technologies, Inc., Chantilly, VA, USA","Agilex Technol., Chantilly, VA, USA"],"affiliations":[{"raw_affiliation_string":"Agilent Technologies, Inc., Chantilly, VA, USA","institution_ids":["https://openalex.org/I138285227"]},{"raw_affiliation_string":"Agilex Technol., Chantilly, VA, USA","institution_ids":["https://openalex.org/I138285227"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5088893823"],"corresponding_institution_ids":["https://openalex.org/I138285227"],"apc_list":null,"apc_paid":null,"fwci":0.411,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.68808885,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"27","last_page":"32"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9951000213623047,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9851999878883362,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8591134548187256},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6308608055114746},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.5988291501998901},{"id":"https://openalex.org/keywords/substring","display_name":"Substring","score":0.5460654497146606},{"id":"https://openalex.org/keywords/transliteration","display_name":"Transliteration","score":0.5460642576217651},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.5157513618469238},{"id":"https://openalex.org/keywords/rendering","display_name":"Rendering (computer graphics)","score":0.5117330551147461},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.497936487197876},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.4976787865161896},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.41389259696006775},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.2220507264137268}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8591134548187256},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6308608055114746},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.5988291501998901},{"id":"https://openalex.org/C182407805","wikidata":"https://www.wikidata.org/wiki/Q2626534","display_name":"Substring","level":3,"score":0.5460654497146606},{"id":"https://openalex.org/C520968082","wikidata":"https://www.wikidata.org/wiki/Q134550","display_name":"Transliteration","level":2,"score":0.5460642576217651},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.5157513618469238},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.5117330551147461},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.497936487197876},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.4976787865161896},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41389259696006775},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.2220507264137268},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/isi.2013.6578781","is_oa":false,"landing_page_url":"https://doi.org/10.1109/isi.2013.6578781","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2013 IEEE International Conference on Intelligence and Security Informatics","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W509898","https://openalex.org/W19758219","https://openalex.org/W41404523","https://openalex.org/W142543598","https://openalex.org/W155741346","https://openalex.org/W1489107989","https://openalex.org/W1595412514","https://openalex.org/W1602633340","https://openalex.org/W1980804566","https://openalex.org/W1982149803","https://openalex.org/W2039001182","https://openalex.org/W2050050080","https://openalex.org/W2053713809","https://openalex.org/W2073471108","https://openalex.org/W2087447526","https://openalex.org/W2093537029","https://openalex.org/W2104511295","https://openalex.org/W2129558264","https://openalex.org/W2135223301","https://openalex.org/W2138062899","https://openalex.org/W2155476369","https://openalex.org/W2166555323","https://openalex.org/W4230502578","https://openalex.org/W4252791511","https://openalex.org/W4285719527","https://openalex.org/W4291172695","https://openalex.org/W6600812996","https://openalex.org/W6601696544","https://openalex.org/W6605769840","https://openalex.org/W6682860659","https://openalex.org/W6684626262"],"related_works":["https://openalex.org/W3153459181","https://openalex.org/W2147866274","https://openalex.org/W2350015575","https://openalex.org/W2371976984","https://openalex.org/W2352160949","https://openalex.org/W2378436233","https://openalex.org/W2251148428","https://openalex.org/W2354361711","https://openalex.org/W4362573963","https://openalex.org/W2176088470"],"abstract_inverted_index":{"In":[0,19,149],"many":[1],"intelligence":[2],"and":[3,16,128],"security":[4],"informatics":[5],"applications,":[6,21],"named":[7],"entities":[8],"constitute":[9],"a":[10,30,48,77,186,197],"particularly":[11],"important":[12],"element":[13],"of":[14,26,43,51,58,71,79,90,144,178],"queries":[15],"analytic":[17],"operations.":[18],"such":[20],"variations":[22],"in":[23,61,105,135],"the":[24,107,109,112,142,152,176],"rendering":[25],"entity":[27],"names":[28,42],"present":[29],"pervasive":[31],"problem.":[32],"The":[33,139],"problem":[34],"is":[35],"most":[36],"frequently":[37],"encountered":[38],"when":[39],"dealing":[40],"with":[41],"persons.":[44],"For":[45],"person":[46],"names,":[47],"wide":[49],"variety":[50],"factors":[52],"may":[53],"lead":[54],"to":[55,123,162,190],"variations:":[56],"use":[57,70],"nicknames,":[59],"differences":[60],"given":[62],"name":[63,87,132,180,191],"/":[64],"surname":[65],"order,":[66],"misspellings,":[67],"phonetic":[68,96],"renderings,":[69],"different":[72],"transliteration":[73],"systems,":[74],"etc.":[75],"Historically,":[76],"number":[78],"methods":[80],"have":[81,92],"been":[82,93],"developed":[83],"for":[84,131],"generating":[85],"possible":[86],"variants.":[88],"Most":[89],"these":[91,115],"based":[94],"on":[95],"similarities,":[97],"edit":[98],"distance,":[99],"or":[100],"longest":[101],"common":[102],"substrings.":[103],"However,":[104],"general,":[106],"larger":[108],"data":[110],"collection,":[111],"less":[113],"effective":[114],"techniques":[116],"are.":[117],"This":[118,173,183],"paper":[119,184],"presents":[120],"an":[121],"approach":[122,140,189,199],"attaining":[124],"both":[125],"high":[126,129],"precision":[127,177,203],"recall":[130],"variant":[133,181,192],"identification":[134],"large":[136],"text":[137],"collections.":[138],"exploits":[141],"technique":[143],"latent":[145],"semantic":[146],"indexing":[147],"(LSI).":[148],"this":[150],"approach,":[151],"contextual":[153],"information":[154],"provided":[155],"by":[156,170],"LSI":[157],"allows":[158],"likely":[159],"true":[160],"variants":[161,168],"be":[163],"selected":[164],"from":[165],"multiple":[166],"candidate":[167,179],"generated":[169],"other":[171],"techniques.":[172],"significantly":[174],"improves":[175],"results.":[182],"describes":[185],"basic":[187],"LSI-augmented":[188],"identification,":[193],"as":[194,196],"well":[195],"new":[198],"that":[200],"yields":[201],"additional":[202],"improvements.":[204]},"counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":1},{"year":2014,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
