{"id":"https://openalex.org/W2421014960","doi":"https://doi.org/10.1017/s1351324916000164","title":"Building a multi-domain comparable corpus using a learning to rank method","display_name":"Building a multi-domain comparable corpus using a learning to rank method","publication_year":2016,"publication_date":"2016-06-15","ids":{"openalex":"https://openalex.org/W2421014960","doi":"https://doi.org/10.1017/s1351324916000164","mag":"2421014960"},"language":"en","primary_location":{"id":"doi:10.1017/s1351324916000164","is_oa":false,"landing_page_url":"https://doi.org/10.1017/s1351324916000164","pdf_url":null,"source":{"id":"https://openalex.org/S18088403","display_name":"Natural Language Engineering","issn_l":"1351-3249","issn":["1351-3249","1469-8110"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Natural Language Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103185916","display_name":"Razieh Rahimi","orcid":"https://orcid.org/0000-0002-2584-3309"},"institutions":[{"id":"https://openalex.org/I23946033","display_name":"University of Tehran","ror":"https://ror.org/05vf56z40","country_code":"IR","type":"education","lineage":["https://openalex.org/I23946033"]}],"countries":["IR"],"is_corresponding":false,"raw_author_name":"RAZIEH RAHIMI","raw_affiliation_strings":["School of Electrical and Computer Engineering, College of Engineering, University of Tehran, Tehran, Iran e-mails:","University of TEHRAN"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Electrical and Computer Engineering, College of Engineering, University of Tehran, Tehran, Iran e-mails:","institution_ids":["https://openalex.org/I23946033"]},{"raw_affiliation_string":"University of TEHRAN","institution_ids":["https://openalex.org/I23946033"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055494428","display_name":"Azadeh Shakery","orcid":"https://orcid.org/0000-0003-1799-8340"},"institutions":[{"id":"https://openalex.org/I23946033","display_name":"University of Tehran","ror":"https://ror.org/05vf56z40","country_code":"IR","type":"education","lineage":["https://openalex.org/I23946033"]},{"id":"https://openalex.org/I4210146419","display_name":"Institute for Research in Fundamental Sciences","ror":"https://ror.org/04xreqs31","country_code":"IR","type":"facility","lineage":["https://openalex.org/I4210146419"]}],"countries":["IR"],"is_corresponding":false,"raw_author_name":"AZADEH SHAKERY","raw_affiliation_strings":["School of Computer Science, Institute for Research in Fundamental Sciences (IPM), Tehran, Iran","School of Electrical and Computer Engineering, College of Engineering, University of Tehran, Tehran, Iran e-mails:"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science, Institute for Research in Fundamental Sciences (IPM), Tehran, Iran","institution_ids":["https://openalex.org/I4210146419"]},{"raw_affiliation_string":"School of Electrical and Computer Engineering, College of Engineering, University of Tehran, Tehran, Iran e-mails:","institution_ids":["https://openalex.org/I23946033"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069233580","display_name":"Javid Dadashkarimi","orcid":"https://orcid.org/0000-0002-1905-547X"},"institutions":[{"id":"https://openalex.org/I23946033","display_name":"University of Tehran","ror":"https://ror.org/05vf56z40","country_code":"IR","type":"education","lineage":["https://openalex.org/I23946033"]}],"countries":["IR"],"is_corresponding":false,"raw_author_name":"JAVID DADASHKARIMI","raw_affiliation_strings":["School of Electrical and Computer Engineering, College of Engineering, University of Tehran, Tehran, Iran e-mails:","University of TEHRAN"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Electrical and Computer Engineering, College of Engineering, University of Tehran, Tehran, Iran e-mails:","institution_ids":["https://openalex.org/I23946033"]},{"raw_affiliation_string":"University of TEHRAN","institution_ids":["https://openalex.org/I23946033"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032426533","display_name":"Mozhdeh Ariannezhad","orcid":"https://orcid.org/0000-0002-1113-8094"},"institutions":[{"id":"https://openalex.org/I23946033","display_name":"University of Tehran","ror":"https://ror.org/05vf56z40","country_code":"IR","type":"education","lineage":["https://openalex.org/I23946033"]}],"countries":["IR"],"is_corresponding":false,"raw_author_name":"MOZHDEH ARIANNEZHAD","raw_affiliation_strings":["School of Electrical and Computer Engineering, College of Engineering, University of Tehran, Tehran, Iran e-mails:","University of TEHRAN"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Electrical and Computer Engineering, College of Engineering, University of Tehran, Tehran, Iran e-mails:","institution_ids":["https://openalex.org/I23946033"]},{"raw_affiliation_string":"University of TEHRAN","institution_ids":["https://openalex.org/I23946033"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102906366","display_name":"Mostafa Dehghani","orcid":"https://orcid.org/0000-0002-9772-1095"},"institutions":[{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"MOSTAFA DEHGHANI","raw_affiliation_strings":["Institute for Logic, Language and Computation, University of Amsterdam, Amsterdam, The Netherlands e-mail:","University of Amsterdam"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute for Logic, Language and Computation, University of Amsterdam, Amsterdam, The Netherlands e-mail:","institution_ids":["https://openalex.org/I887064364"]},{"raw_affiliation_string":"University of Amsterdam","institution_ids":["https://openalex.org/I887064364"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5028647264","display_name":"Hossein Esfahani","orcid":"https://orcid.org/0000-0002-2291-3412"},"institutions":[{"id":"https://openalex.org/I23946033","display_name":"University of Tehran","ror":"https://ror.org/05vf56z40","country_code":"IR","type":"education","lineage":["https://openalex.org/I23946033"]}],"countries":["IR"],"is_corresponding":false,"raw_author_name":"HOSSEIN NASR ESFAHANI","raw_affiliation_strings":["School of Electrical and Computer Engineering, College of Engineering, University of Tehran, Tehran, Iran e-mails:","University of TEHRAN"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Electrical and Computer Engineering, College of Engineering, University of Tehran, Tehran, Iran e-mails:","institution_ids":["https://openalex.org/I23946033"]},{"raw_affiliation_string":"University of TEHRAN","institution_ids":["https://openalex.org/I23946033"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":3.0915,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.92895771,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"22","issue":"4","first_page":"627","last_page":"653"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8976091146469116},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.7048184871673584},{"id":"https://openalex.org/keywords/comparability","display_name":"Comparability","score":0.6979775428771973},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.641456127166748},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5855656862258911},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5625112056732178},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5513691902160645},{"id":"https://openalex.org/keywords/rank","display_name":"Rank (graph theory)","score":0.5481042861938477},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.5211524963378906},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4782796800136566},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4748176336288452},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.46392783522605896},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.457528680562973},{"id":"https://openalex.org/keywords/learning-to-rank","display_name":"Learning to rank","score":0.45141884684562683},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.42541080713272095},{"id":"https://openalex.org/keywords/directory","display_name":"Directory","score":0.42523202300071716}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8976091146469116},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.7048184871673584},{"id":"https://openalex.org/C197947376","wikidata":"https://www.wikidata.org/wiki/Q5155608","display_name":"Comparability","level":2,"score":0.6979775428771973},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.641456127166748},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5855656862258911},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5625112056732178},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5513691902160645},{"id":"https://openalex.org/C164226766","wikidata":"https://www.wikidata.org/wiki/Q7293202","display_name":"Rank (graph theory)","level":2,"score":0.5481042861938477},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.5211524963378906},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4782796800136566},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4748176336288452},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.46392783522605896},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.457528680562973},{"id":"https://openalex.org/C86037889","wikidata":"https://www.wikidata.org/wiki/Q4330127","display_name":"Learning to rank","level":3,"score":0.45141884684562683},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.42541080713272095},{"id":"https://openalex.org/C2777683733","wikidata":"https://www.wikidata.org/wiki/Q201456","display_name":"Directory","level":2,"score":0.42523202300071716},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1017/s1351324916000164","is_oa":false,"landing_page_url":"https://doi.org/10.1017/s1351324916000164","pdf_url":null,"source":{"id":"https://openalex.org/S18088403","display_name":"Natural Language Engineering","issn_l":"1351-3249","issn":["1351-3249","1469-8110"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Natural Language Engineering","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.7400000095367432}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":51,"referenced_works":["https://openalex.org/W61293283","https://openalex.org/W67077090","https://openalex.org/W77273554","https://openalex.org/W85946539","https://openalex.org/W134648021","https://openalex.org/W614252839","https://openalex.org/W1539361473","https://openalex.org/W1562514231","https://openalex.org/W1606604835","https://openalex.org/W1624239806","https://openalex.org/W1769949216","https://openalex.org/W1964348731","https://openalex.org/W1973152633","https://openalex.org/W1976381247","https://openalex.org/W1981053739","https://openalex.org/W1988012253","https://openalex.org/W2009077327","https://openalex.org/W2026306693","https://openalex.org/W2032958446","https://openalex.org/W2037796960","https://openalex.org/W2037978712","https://openalex.org/W2042071625","https://openalex.org/W2046456023","https://openalex.org/W2047221353","https://openalex.org/W2047959359","https://openalex.org/W2078323847","https://openalex.org/W2082359527","https://openalex.org/W2084277454","https://openalex.org/W2096765155","https://openalex.org/W2101010660","https://openalex.org/W2105673178","https://openalex.org/W2107695330","https://openalex.org/W2119821739","https://openalex.org/W2133837072","https://openalex.org/W2140903445","https://openalex.org/W2142756035","https://openalex.org/W2151521349","https://openalex.org/W2156985047","https://openalex.org/W2162657887","https://openalex.org/W2172706639","https://openalex.org/W2250924616","https://openalex.org/W2251533634","https://openalex.org/W2251569308","https://openalex.org/W2406799328","https://openalex.org/W2578576916","https://openalex.org/W2915731794","https://openalex.org/W3021520429","https://openalex.org/W4239510810","https://openalex.org/W4302313152","https://openalex.org/W6632442620","https://openalex.org/W6691611999"],"related_works":["https://openalex.org/W1934555896","https://openalex.org/W104148947","https://openalex.org/W2041335144","https://openalex.org/W2572315477","https://openalex.org/W2021344411","https://openalex.org/W4283836875","https://openalex.org/W3172141810","https://openalex.org/W3199233695","https://openalex.org/W2099421013","https://openalex.org/W1527781030"],"abstract_inverted_index":{"Abstract":[0],"Comparable":[1],"corpora":[2,22,211],"are":[3,23],"key":[4],"translation":[5],"resources":[6],"for":[7,19,34,90,110,162,208],"both":[8,143,218],"languages":[9,147],"and":[10,56,145,220],"domains":[11,186],"with":[12,95,205],"limited":[13],"linguistic":[14],"resources.":[15],"The":[16,101],"existing":[17,206],"approaches":[18,44,207],"building":[20,166,209],"comparable":[21,150,167,180,210],"mostly":[24],"based":[25],"on":[26,139],"ranking":[27,91,102],"candidate":[28,74,92],"documents":[29,76,94,114],"in":[30,70],"the":[31,65,71,129,134,140,163,171,196],"target":[32,75,93,146],"language":[33],"each":[35,68,98,108],"source":[36,99,144],"document":[37,50],"using":[38],"a":[39,85,116,149,155,177],"cross-lingual":[40],"retrieval":[41],"model.":[42],"These":[43],"also":[45,153],"exploit":[46],"other":[47],"evidence":[48,69,109],"of":[49,67,73,112,131,136,142,148,165,202,222],"similarity,":[51],"such":[52],"as":[53,115],"proper":[54],"names":[55],"publication":[57],"dates,":[58],"to":[59,87,97,157,175],"build":[60,176],"more":[61],"reliable":[62],"alignments.":[63,223],"However,":[64],"importance":[66],"scores":[72],"is":[77,104,120],"determined":[78],"heuristically.":[79],"In":[80],"this":[81],"paper,":[82],"we":[83],"employ":[84],"learning":[86],"rank":[88],"method":[89,156],"respect":[96],"document.":[100],"model":[103],"constructed":[105],"by":[106],"defining":[107],"similarity":[111],"bilingual":[113],"feature":[117,124],"whose":[118],"weight":[119],"learned":[121],"automatically.":[122],"Learning":[123],"weights":[125],"can":[126],"significantly":[127],"improve":[128],"quality":[130,219],"alignments,":[132],"because":[133],"reliability":[135],"features":[137],"depends":[138],"characteristics":[141],"corpus.":[151],"We":[152,169],"propose":[154],"generate":[158],"appropriate":[159],"training":[160],"data":[161],"task":[164],"corpora.":[168],"employed":[170],"proposed":[172],"learning-based":[173,215],"approach":[174,216],"multi-domain":[178],"English\u2013Persian":[179],"corpus":[181],"which":[182],"covers":[183],"twelve":[184],"different":[185],"obtained":[187],"from":[188],"Open":[189],"Directory":[190],"Project.":[191],"Experimental":[192],"results":[193],"show":[194],"that":[195,213],"created":[197],"alignments":[198],"have":[199],"high":[200],"degrees":[201],"comparability.":[203],"Comparison":[204],"shows":[212],"our":[214],"improves":[217],"coverage":[221]},"counts_by_year":[{"year":2023,"cited_by_count":2},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2016,"cited_by_count":5}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
