{"id":"https://openalex.org/W2050929561","doi":"https://doi.org/10.1145/1621995.1622040","title":"Experience report","display_name":"Experience report","publication_year":2009,"publication_date":"2009-10-05","ids":{"openalex":"https://openalex.org/W2050929561","doi":"https://doi.org/10.1145/1621995.1622040","mag":"2050929561"},"language":"en","primary_location":{"id":"doi:10.1145/1621995.1622040","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1621995.1622040","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 27th ACM international conference on Design of communication","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5054677783","display_name":"Youngik Yang","orcid":"https://orcid.org/0000-0003-3219-4471"},"institutions":[{"id":"https://openalex.org/I4210119109","display_name":"Indiana University Bloomington","ror":"https://ror.org/02k40bc56","country_code":"US","type":"education","lineage":["https://openalex.org/I4210119109","https://openalex.org/I592451"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Youngik Yang","raw_affiliation_strings":["Indiana University, Bloomington, IN, USA"],"affiliations":[{"raw_affiliation_string":"Indiana University, Bloomington, IN, USA","institution_ids":["https://openalex.org/I4210119109"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100460452","display_name":"Sun Kim","orcid":"https://orcid.org/0000-0003-3072-6649"},"institutions":[{"id":"https://openalex.org/I4210119109","display_name":"Indiana University Bloomington","ror":"https://ror.org/02k40bc56","country_code":"US","type":"education","lineage":["https://openalex.org/I4210119109","https://openalex.org/I592451"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sun Kim","raw_affiliation_strings":["Indiana University, Bloomington, IN, USA"],"affiliations":[{"raw_affiliation_string":"Indiana University, Bloomington, IN, USA","institution_ids":["https://openalex.org/I4210119109"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5054677783"],"corresponding_institution_ids":["https://openalex.org/I4210119109"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.11708329,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"227","last_page":"232"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10887","display_name":"Bioinformatics and Genomic Networks","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.8861451148986816},{"id":"https://openalex.org/keywords/gene-annotation","display_name":"Gene Annotation","score":0.6443831324577332},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6068675518035889},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.6067289710044861},{"id":"https://openalex.org/keywords/cosine-similarity","display_name":"Cosine similarity","score":0.5704432725906372},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5266663432121277},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.4959143102169037},{"id":"https://openalex.org/keywords/genome","display_name":"Genome","score":0.4880031645298004},{"id":"https://openalex.org/keywords/alignment-free-sequence-analysis","display_name":"Alignment-free sequence analysis","score":0.48251816630363464},{"id":"https://openalex.org/keywords/genome-project","display_name":"Genome project","score":0.4737951159477234},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.46023061871528625},{"id":"https://openalex.org/keywords/dna-sequencing","display_name":"DNA sequencing","score":0.45547887682914734},{"id":"https://openalex.org/keywords/computational-biology","display_name":"Computational biology","score":0.42911583185195923},{"id":"https://openalex.org/keywords/gene","display_name":"Gene","score":0.35526666045188904},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3236038088798523},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.29559993743896484},{"id":"https://openalex.org/keywords/sequence-alignment","display_name":"Sequence alignment","score":0.248796284198761},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2483942210674286},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.23358154296875},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.1410401165485382}],"concepts":[{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.8861451148986816},{"id":"https://openalex.org/C2908923196","wikidata":"https://www.wikidata.org/wiki/Q5205742","display_name":"Gene Annotation","level":4,"score":0.6443831324577332},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6068675518035889},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.6067289710044861},{"id":"https://openalex.org/C2780762811","wikidata":"https://www.wikidata.org/wiki/Q1784941","display_name":"Cosine similarity","level":3,"score":0.5704432725906372},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5266663432121277},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.4959143102169037},{"id":"https://openalex.org/C141231307","wikidata":"https://www.wikidata.org/wiki/Q7020","display_name":"Genome","level":3,"score":0.4880031645298004},{"id":"https://openalex.org/C180384323","wikidata":"https://www.wikidata.org/wiki/Q16335137","display_name":"Alignment-free sequence analysis","level":5,"score":0.48251816630363464},{"id":"https://openalex.org/C89566754","wikidata":"https://www.wikidata.org/wiki/Q2273828","display_name":"Genome project","level":4,"score":0.4737951159477234},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.46023061871528625},{"id":"https://openalex.org/C51679486","wikidata":"https://www.wikidata.org/wiki/Q380546","display_name":"DNA sequencing","level":3,"score":0.45547887682914734},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.42911583185195923},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.35526666045188904},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3236038088798523},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.29559993743896484},{"id":"https://openalex.org/C45484198","wikidata":"https://www.wikidata.org/wiki/Q827246","display_name":"Sequence alignment","level":4,"score":0.248796284198761},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2483942210674286},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.23358154296875},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.1410401165485382},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.0},{"id":"https://openalex.org/C167625842","wikidata":"https://www.wikidata.org/wiki/Q899763","display_name":"Peptide sequence","level":3,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1621995.1622040","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1621995.1622040","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 27th ACM international conference on Design of communication","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4399999976158142,"id":"https://metadata.un.org/sdg/8","display_name":"Decent work and economic growth"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W136846643","https://openalex.org/W1990453950","https://openalex.org/W1995117714","https://openalex.org/W2029195137","https://openalex.org/W2038721957","https://openalex.org/W2040022355","https://openalex.org/W2055043387","https://openalex.org/W2102429534","https://openalex.org/W2103017472","https://openalex.org/W2122545791","https://openalex.org/W2144896636","https://openalex.org/W2161062388","https://openalex.org/W6647881114"],"related_works":["https://openalex.org/W2162782320","https://openalex.org/W2059565715","https://openalex.org/W2279845149","https://openalex.org/W2078942911","https://openalex.org/W2294108133","https://openalex.org/W4280623106","https://openalex.org/W4231584750","https://openalex.org/W3034139913","https://openalex.org/W2793945238","https://openalex.org/W2101269698"],"abstract_inverted_index":{"Annotating":[0],"function":[1,66,78,121,131,160,273],"of":[2,7,37,39,52,76,100,108,113,122,158,240,252,255],"genes":[3,101,110],"accurately":[4],"is":[5,68,79,96,116,133,202],"one":[6],"the":[8,23,30,43,50,61,85,104,120,156],"most":[9,90],"important":[10],"tasks":[11],"in":[12,111,174,183,187],"molecular":[13],"biology":[14,241],"and":[15,151,222,235,246],"medical":[16],"sciences.":[17],"The":[18,89],"new":[19],"sequencing":[20,26,29,40,45],"technology,":[21,27],"called":[22],"next":[24],"generation":[25],"made":[28],"whole":[31],"genomes":[32],"possible":[33],"with":[34,207,250],"a":[35,48,80,123,137,167,188,253],"fraction":[36],"cost":[38],"by":[41],"using":[42,238],"traditional":[44],"technology.":[46],"As":[47,136],"result,":[49,138],"amount":[51],"sequence":[53,105,114,152],"data":[54],"has":[55],"been":[56,165],"growing":[57],"very":[58],"rapidly,":[59],"but":[60,269],"computational":[62,168],"method":[63,169,259],"for":[64,170],"gene":[65,77,93,130,159,172,227,236,272],"annotation":[67,75,94,99,107,132,150,161,173,186,228],"yet":[69],"to":[70,83,97,119,262],"be":[71],"fully":[72],"developed.":[73],"Thus":[74],"serious":[81],"bottleneck":[82],"achieving":[84],"high-throughput":[86],"genome":[87,139,185],"projects.":[88],"commonly":[91,212],"used":[92,196,211,213],"technique":[95],"transfer":[98],"based":[102,129],"on":[103,143],"similarity;":[106],"top-ranked":[109],"terms":[112],"similarity":[115,153,201],"simply":[117],"transferred":[118],"target":[124],"gene.":[125],"However,":[126],"this":[127,177],"sequence-similarity":[128],"often":[134],"incorrect.":[135],"projects":[140],"still":[141],"rely":[142],"expensive,":[144],"error-prone,":[145],"labor-intensive,":[146],"manual":[147],"process.":[148],"Combining":[149],"can":[154],"improve":[155],"accuracy":[157],"significantly.":[162],"We":[163],"have":[164],"developing":[166],"comparing":[171,184],"text.":[175],"In":[176,248],"paper,":[178],"we":[179,195,210],"will":[180],"discuss":[181],"issues":[182],"text":[189,214],"format.":[190],"To":[191],"compute":[192],"textual":[193,208],"similarity,":[194],"cosine":[197,200],"similarity.":[198],"Since":[199],"effective":[203],"only":[204],"after":[205],"preprocessing":[206,215,230],"variations,":[209],"techniques":[216],"such":[217,231,243],"as":[218,224,226,232,244],"removing":[219],"stop":[220],"words":[221],"stemming":[223],"well":[225],"specific":[229],"handling":[233],"synonyms":[234],"symbols":[237],"databases":[239],"terminologies":[242],"BioThesaurus":[245],"MeSH.":[247],"experiments":[249],"annotations":[251],"number":[254],"bacterial":[256],"genomes,":[257],"our":[258],"was":[260],"able":[261],"handle":[263],"many":[264],"difficult":[265],"cases":[266],"(syntactically":[267],"different":[268],"semantically":[270],"equivalent":[271],"annotations)":[274],"correctly.":[275]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2016-06-24T00:00:00"}
