{"id":"https://openalex.org/W2211796614","doi":"https://doi.org/10.1145/2833089","title":"Integrated Parallel Sentence and Fragment Extraction from Comparable Corpora","display_name":"Integrated Parallel Sentence and Fragment Extraction from Comparable Corpora","publication_year":2015,"publication_date":"2015-12-11","ids":{"openalex":"https://openalex.org/W2211796614","doi":"https://doi.org/10.1145/2833089","mag":"2211796614"},"language":"en","primary_location":{"id":"doi:10.1145/2833089","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2833089","pdf_url":null,"source":{"id":"https://openalex.org/S4306421405","display_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","issn_l":"2375-4699","issn":["2375-4699","2375-4702"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102757632","display_name":"Chenhui Chu","orcid":"https://orcid.org/0000-0001-9848-6384"},"institutions":[{"id":"https://openalex.org/I4210086780","display_name":"Japan Science and Technology Agency","ror":"https://ror.org/00097mb19","country_code":"JP","type":"government","lineage":["https://openalex.org/I4210086780"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Chenhui Chu","raw_affiliation_strings":["Japan Science and Technology Agency, Honcho, Kawaguchi-shi, Saitama, Japan"],"affiliations":[{"raw_affiliation_string":"Japan Science and Technology Agency, Honcho, Kawaguchi-shi, Saitama, Japan","institution_ids":["https://openalex.org/I4210086780"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070222190","display_name":"Toshiaki Nakazawa","orcid":null},"institutions":[{"id":"https://openalex.org/I4210086780","display_name":"Japan Science and Technology Agency","ror":"https://ror.org/00097mb19","country_code":"JP","type":"government","lineage":["https://openalex.org/I4210086780"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Toshiaki Nakazawa","raw_affiliation_strings":["Japan Science and Technology Agency, Honcho, Kawaguchi-shi, Saitama, Japan"],"affiliations":[{"raw_affiliation_string":"Japan Science and Technology Agency, Honcho, Kawaguchi-shi, Saitama, Japan","institution_ids":["https://openalex.org/I4210086780"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5028836340","display_name":"Sadao Kurohashi","orcid":"https://orcid.org/0000-0001-5398-8399"},"institutions":[{"id":"https://openalex.org/I22299242","display_name":"Kyoto University","ror":"https://ror.org/02kpeqv85","country_code":"JP","type":"education","lineage":["https://openalex.org/I22299242"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Sadao Kurohashi","raw_affiliation_strings":["Kyoto University, Sakyo-ku, Kyoto, Japan"],"affiliations":[{"raw_affiliation_string":"Kyoto University, Sakyo-ku, Kyoto, Japan","institution_ids":["https://openalex.org/I22299242"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5102757632"],"corresponding_institution_ids":["https://openalex.org/I4210086780"],"apc_list":null,"apc_paid":null,"fwci":1.3354,"has_fulltext":false,"cited_by_count":18,"citation_normalized_percentile":{"value":0.86657657,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"15","issue":"2","first_page":"1","last_page":"22"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.9686999917030334,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/parallel-corpora","display_name":"Parallel corpora","score":0.9211742281913757},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.865623950958252},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.7264284491539001},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6619994640350342},{"id":"https://openalex.org/keywords/fragment","display_name":"Fragment (logic)","score":0.6501579880714417},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6246046423912048},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.5740900039672852},{"id":"https://openalex.org/keywords/lexicon","display_name":"Lexicon","score":0.5270845293998718},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.518988311290741},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.492480605840683},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.32109588384628296},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.19173762202262878}],"concepts":[{"id":"https://openalex.org/C2985367798","wikidata":"https://www.wikidata.org/wiki/Q1346592","display_name":"Parallel corpora","level":3,"score":0.9211742281913757},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.865623950958252},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.7264284491539001},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6619994640350342},{"id":"https://openalex.org/C2776235265","wikidata":"https://www.wikidata.org/wiki/Q18392052","display_name":"Fragment (logic)","level":2,"score":0.6501579880714417},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6246046423912048},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.5740900039672852},{"id":"https://openalex.org/C2778121359","wikidata":"https://www.wikidata.org/wiki/Q8096","display_name":"Lexicon","level":2,"score":0.5270845293998718},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.518988311290741},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.492480605840683},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.32109588384628296},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.19173762202262878},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/2833089","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2833089","pdf_url":null,"source":{"id":"https://openalex.org/S4306421405","display_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","issn_l":"2375-4699","issn":["2375-4699","2375-4702"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","raw_type":"journal-article"},{"id":"pmh:oai:repository.kulib.kyoto-u.ac.jp:2433/265843","is_oa":false,"landing_page_url":"http://hdl.handle.net/2433/265843","pdf_url":null,"source":{"id":"https://openalex.org/S4306401454","display_name":"Kyoto University Research Information Repository (Kyoto University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I22299242","host_organization_name":"Kyoto University","host_organization_lineage":["https://openalex.org/I22299242"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"journal article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.7900000214576721}],"awards":[{"id":"https://openalex.org/G3236194794","display_name":null,"funder_award_id":"Grant-in-Aid","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G3459562248","display_name":null,"funder_award_id":"Grant","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G4874944895","display_name":null,"funder_award_id":"-in-Aid","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G5256887504","display_name":null,"funder_award_id":"Japan Society for the Promotion of Science (JSPS)","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G5396239172","display_name":null,"funder_award_id":"Grant-in-Aid for JSPS Fellows","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G6270309778","display_name":"\u5727\u7e2e\u6027\u52a0\u5de5\u306b\u304a\u3051\u308b\u91d1\u5c5e\u6750\u6599\u306e\u5851\u6027\u5909\u5f62\u9650","funder_award_id":"52305","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G7731861387","display_name":"\u5317\u4fe1\u5730\u65b9\u306b\u304a\u3051\u308b\u9ad8\u793e\u5c71\u6591\u5c3e\u5c71\u306e\u6c17\u8c61\u306b\u304a\u3088\u307c\u3059\u5f71\u97ff\u306e\u5c0f\u6c17\u5019\u7684\u306a\u7a76\u660e","funder_award_id":"23057","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G7752643416","display_name":null,"funder_award_id":"Japan","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"}],"funders":[{"id":"https://openalex.org/F4320320273","display_name":"University of Cambridge","ror":"https://ror.org/013meh722"},{"id":"https://openalex.org/F4320334764","display_name":"Japan Society for the Promotion of Science","ror":"https://ror.org/00hhkn466"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":61,"referenced_works":["https://openalex.org/W4707553","https://openalex.org/W11209375","https://openalex.org/W14574270","https://openalex.org/W75158669","https://openalex.org/W82889912","https://openalex.org/W92412080","https://openalex.org/W103908725","https://openalex.org/W222053410","https://openalex.org/W1490062449","https://openalex.org/W1524281572","https://openalex.org/W1807664792","https://openalex.org/W1916559533","https://openalex.org/W1963760715","https://openalex.org/W1966253631","https://openalex.org/W1982474572","https://openalex.org/W1988012253","https://openalex.org/W2006969979","https://openalex.org/W2024181699","https://openalex.org/W2047295649","https://openalex.org/W2065565011","https://openalex.org/W2101096097","https://openalex.org/W2101105183","https://openalex.org/W2104103102","https://openalex.org/W2105673178","https://openalex.org/W2107695330","https://openalex.org/W2116713744","https://openalex.org/W2118495769","https://openalex.org/W2127876534","https://openalex.org/W2130919220","https://openalex.org/W2140903445","https://openalex.org/W2141068210","https://openalex.org/W2145662801","https://openalex.org/W2146574666","https://openalex.org/W2153635508","https://openalex.org/W2156985047","https://openalex.org/W2158310256","https://openalex.org/W2166098990","https://openalex.org/W2250491765","https://openalex.org/W2250977808","https://openalex.org/W2251076467","https://openalex.org/W2251249103","https://openalex.org/W2251366179","https://openalex.org/W2251449587","https://openalex.org/W2251881202","https://openalex.org/W2251912702","https://openalex.org/W2322902172","https://openalex.org/W2524182563","https://openalex.org/W2610593052","https://openalex.org/W2613000335","https://openalex.org/W2736790173","https://openalex.org/W2737831686","https://openalex.org/W2759083144","https://openalex.org/W2799915114","https://openalex.org/W2892737606","https://openalex.org/W2912761163","https://openalex.org/W2930957955","https://openalex.org/W2998215494","https://openalex.org/W3197138970","https://openalex.org/W3202296894","https://openalex.org/W4240795705","https://openalex.org/W4242605130"],"related_works":["https://openalex.org/W2786253471","https://openalex.org/W3175595715","https://openalex.org/W2604275745","https://openalex.org/W2986030184","https://openalex.org/W2104907655","https://openalex.org/W2985215540","https://openalex.org/W4307459710","https://openalex.org/W3155572818","https://openalex.org/W4293584592","https://openalex.org/W4285266806"],"abstract_inverted_index":{"Parallel":[0,83],"corpora":[1,22],"are":[2,11,23],"crucial":[3],"for":[4,14,41,96,114],"statistical":[5],"machine":[6],"translation":[7],"(SMT);":[8],"however,":[9],"they":[10],"quite":[12],"scarce":[13],"most":[15],"language":[16],"pairs":[17],"and":[18,56,94,109,149,176],"domains.":[19],"As":[20],"comparable":[21,59,72,81,128],"far":[24],"more":[25],"available,":[26],"many":[27],"studies":[28,117],"have":[29,118],"been":[30],"conducted":[31],"to":[32,51,67,122,143,154],"extract":[33,52,76,124],"either":[34],"parallel":[35,54,64,69,77,90,97,125,134,146,158,178],"sentences":[36,55,70],"or":[37],"fragments":[38,57,78,126],"from":[39,58,71,79,127],"them":[40],"SMT.":[42],"In":[43],"this":[44],"article,":[45],"we":[46],"propose":[47,131],"an":[48,132,140,150],"integrated":[49],"system":[50,183],"both":[53],"corpora.":[60],"We":[61,74,100,130],"first":[62],"apply":[63],"sentence":[65,84,91,98],"extraction":[66,85,136],"identify":[68,155],"sentences.":[73,82,129],"then":[75],"the":[80,145,156,164,177],"is":[86],"based":[87],"on":[88,163],"a":[89,105],"candidate":[92],"filter":[93,153],"classifier":[95],"identification.":[99],"improve":[101],"it":[102,120],"by":[103,181],"proposing":[104],"novel":[106,111],"filtering":[107],"strategy":[108],"three":[110],"feature":[112],"sets":[113],"classification.":[115],"Previous":[116],"found":[119],"difficult":[121],"accurately":[123],"accurate":[133,151],"fragment":[135,147],"method":[137],"that":[138,168],"uses":[139],"alignment":[141],"model":[142],"locate":[144],"candidates":[148],"lexicon-based":[152],"truly":[157],"fragments.":[159],"A":[160],"case":[161],"study":[162],"Chinese--Japanese":[165],"Wikipedia":[166],"indicates":[167],"our":[169,182],"proposed":[170,174],"methods":[171],"outperform":[172],"previously":[173],"methods,":[175],"data":[179],"extracted":[180],"significantly":[184],"improves":[185],"SMT":[186],"performance.":[187]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":4},{"year":2017,"cited_by_count":3}],"updated_date":"2026-04-15T08:11:43.952461","created_date":"2025-10-10T00:00:00"}
