{"id":"https://openalex.org/W2742030900","doi":"https://doi.org/10.18653/v1/w17-2330","title":"Protein Word Detection using Text Segmentation Techniques","display_name":"Protein Word Detection using Text Segmentation Techniques","publication_year":2017,"publication_date":"2017-01-01","ids":{"openalex":"https://openalex.org/W2742030900","doi":"https://doi.org/10.18653/v1/w17-2330","mag":"2742030900"},"language":"en","primary_location":{"id":"doi:10.18653/v1/w17-2330","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w17-2330","pdf_url":"https://www.aclweb.org/anthology/W17-2330.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BioNLP 2017","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/W17-2330.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5110567638","display_name":"Devi Ganesan","orcid":null},"institutions":[{"id":"https://openalex.org/I24676775","display_name":"Indian Institute of Technology Madras","ror":"https://ror.org/03v0r5n49","country_code":"IN","type":"facility","lineage":["https://openalex.org/I24676775"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Devi Ganesan","raw_affiliation_strings":["Department of CSE IIT Madras Chennai-600036, India"],"affiliations":[{"raw_affiliation_string":"Department of CSE IIT Madras Chennai-600036, India","institution_ids":["https://openalex.org/I24676775"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048099130","display_name":"Ashish V. Tendulkar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ashish V. Tendulkar","raw_affiliation_strings":["Google Inc., Hyderabad-500084, India"],"affiliations":[{"raw_affiliation_string":"Google Inc., Hyderabad-500084, India","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5091240335","display_name":"Sutanu Chakraborti","orcid":null},"institutions":[{"id":"https://openalex.org/I24676775","display_name":"Indian Institute of Technology Madras","ror":"https://ror.org/03v0r5n49","country_code":"IN","type":"facility","lineage":["https://openalex.org/I24676775"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Sutanu Chakraborti","raw_affiliation_strings":["Department of CSE IIT Madras Chennai-600036, India"],"affiliations":[{"raw_affiliation_string":"Department of CSE IIT Madras Chennai-600036, India","institution_ids":["https://openalex.org/I24676775"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5110567638"],"corresponding_institution_ids":["https://openalex.org/I24676775"],"apc_list":null,"apc_paid":null,"fwci":0.1063,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.48109599,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"238","last_page":"246"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6992905139923096},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6882920861244202},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6512894630432129},{"id":"https://openalex.org/keywords/complement","display_name":"Complement (music)","score":0.6325340270996094},{"id":"https://openalex.org/keywords/text-segmentation","display_name":"Text segmentation","score":0.6224915981292725},{"id":"https://openalex.org/keywords/premise","display_name":"Premise","score":0.601983904838562},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5564273595809937},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5384902954101562},{"id":"https://openalex.org/keywords/decipherment","display_name":"Decipherment","score":0.512526273727417},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.48082005977630615},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.4736279845237732},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4255147874355316},{"id":"https://openalex.org/keywords/parallels","display_name":"Parallels","score":0.4209640622138977},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.3135135769844055},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.11154428124427795},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08280447125434875}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6992905139923096},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6882920861244202},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6512894630432129},{"id":"https://openalex.org/C112313634","wikidata":"https://www.wikidata.org/wiki/Q7886648","display_name":"Complement (music)","level":5,"score":0.6325340270996094},{"id":"https://openalex.org/C98501671","wikidata":"https://www.wikidata.org/wiki/Q1948408","display_name":"Text segmentation","level":3,"score":0.6224915981292725},{"id":"https://openalex.org/C2778023277","wikidata":"https://www.wikidata.org/wiki/Q321703","display_name":"Premise","level":2,"score":0.601983904838562},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5564273595809937},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5384902954101562},{"id":"https://openalex.org/C2778467380","wikidata":"https://www.wikidata.org/wiki/Q1345443","display_name":"Decipherment","level":2,"score":0.512526273727417},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.48082005977630615},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.4736279845237732},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4255147874355316},{"id":"https://openalex.org/C2775922551","wikidata":"https://www.wikidata.org/wiki/Q7135033","display_name":"Parallels","level":2,"score":0.4209640622138977},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.3135135769844055},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.11154428124427795},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08280447125434875},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C127716648","wikidata":"https://www.wikidata.org/wiki/Q104053","display_name":"Phenotype","level":3,"score":0.0},{"id":"https://openalex.org/C188082640","wikidata":"https://www.wikidata.org/wiki/Q1780899","display_name":"Complementation","level":4,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/w17-2330","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w17-2330","pdf_url":"https://www.aclweb.org/anthology/W17-2330.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BioNLP 2017","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/w17-2330","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w17-2330","pdf_url":"https://www.aclweb.org/anthology/W17-2330.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BioNLP 2017","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.8500000238418579,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2742030900.pdf","grobid_xml":"https://content.openalex.org/works/W2742030900.grobid-xml"},"referenced_works_count":24,"referenced_works":["https://openalex.org/W1528203680","https://openalex.org/W1543636183","https://openalex.org/W1585280831","https://openalex.org/W1606593898","https://openalex.org/W1972177221","https://openalex.org/W1991156902","https://openalex.org/W2010904931","https://openalex.org/W2022201897","https://openalex.org/W2041404167","https://openalex.org/W2085277871","https://openalex.org/W2093205346","https://openalex.org/W2107158607","https://openalex.org/W2114535528","https://openalex.org/W2119409375","https://openalex.org/W2123010948","https://openalex.org/W2133657429","https://openalex.org/W2135611423","https://openalex.org/W2140991203","https://openalex.org/W2161072217","https://openalex.org/W2169147927","https://openalex.org/W2250582181","https://openalex.org/W2252139019","https://openalex.org/W2294970769","https://openalex.org/W2978725006"],"related_works":["https://openalex.org/W2277794996","https://openalex.org/W2353267881","https://openalex.org/W2087163944","https://openalex.org/W4403457682","https://openalex.org/W2313343935","https://openalex.org/W2123799460","https://openalex.org/W4249394616","https://openalex.org/W2011818515","https://openalex.org/W4243114292","https://openalex.org/W4294940244"],"abstract_inverted_index":{"Literature":[0],"in":[1,13,107,115],"Molecular":[2],"Biology":[3],"is":[4,41],"abundant":[5],"with":[6],"linguistic":[7],"metaphors.":[8],"There":[9],"have":[10,32],"been":[11],"works":[12],"the":[14,27,44,65,74,83,96,108],"past":[15],"that":[16,30],"attempt":[17,51],"to":[18,43,52,61,82,102],"draw":[19],"parallels":[20],"between":[21],"linguistics":[22],"and":[23],"biology,":[24],"driven":[25,105],"by":[26],"fundamental":[28],"premise":[29],"proteins":[31],"a":[33,54,122],"language":[34,59],"of":[35,46,67,76,85,98],"their":[36],"own.":[37],"Since":[38],"word":[39],"detection":[40],"crucial":[42],"decipherment":[45],"any":[47],"unknown":[48],"language,":[49],"we":[50,72,94],"establish":[53],"problem":[55],"mapping":[56],"from":[57,89],"natural":[58],"text":[60,79,109],"protein":[62,90,128,131],"sequences":[63],"at":[64],"level":[66],"words.":[68],"Towards":[69],"this":[70],"end,":[71],"explore":[73],"use":[75],"an":[77],"unsupervised":[78],"segmentation":[80,110],"algorithm":[81],"task":[84],"extracting":[86],"\"biological":[87],"words\"":[88],"sequences.":[91],"In":[92],"particular,":[93],"demonstrate":[95],"effectiveness":[97],"using":[99],"domain":[100],"knowledge":[101],"complement":[103],"data":[104],"approaches":[106],"task,":[111],"as":[112,114],"well":[113],"its":[116],"biological":[117],"counterpart.":[118],"We":[119],"also":[120],"propose":[121],"novel":[123],"extrinsic":[124],"evaluation":[125],"measure":[126],"for":[127],"words":[129],"through":[130],"family":[132],"classification.":[133],"'B','IRDONTHETREE'":[134],"'BI','RD','ONTHET','R','E','E'":[135],"'B','I','R','D','ONTHET','REE'":[136],"'BIR','D','ONT','HE','TREE'":[137],"'BIRDON','THE','TREE'":[138],"'BIRD','ON','THETREE'":[139]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
