{"id":"https://openalex.org/W2043842914","doi":"https://doi.org/10.1002/meet.14503901107","title":"Language\u2010based retrieval of Web documents: An analysis of the Arabic\u2010recognition capabilities of two major search engines","display_name":"Language\u2010based retrieval of Web documents: An analysis of the Arabic\u2010recognition capabilities of two major search engines","publication_year":2002,"publication_date":"2002-11-01","ids":{"openalex":"https://openalex.org/W2043842914","doi":"https://doi.org/10.1002/meet.14503901107","mag":"2043842914"},"language":"en","primary_location":{"id":"doi:10.1002/meet.14503901107","is_oa":true,"landing_page_url":"https://doi.org/10.1002/meet.14503901107","pdf_url":"https://onlinelibrary.wiley.com/doi/pdfdirect/10.1002/meet.14503901107","source":{"id":"https://openalex.org/S4306523999","display_name":"Proceedings of the American Society for Information Science and Technology","issn_l":"1550-8390","issn":["1550-8390","1936-1734"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the American Society for Information Science and Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://onlinelibrary.wiley.com/doi/pdfdirect/10.1002/meet.14503901107","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036544297","display_name":"Haidar Moukdad","orcid":null},"institutions":[{"id":"https://openalex.org/I129902397","display_name":"Dalhousie University","ror":"https://ror.org/01e6qks80","country_code":"CA","type":"education","lineage":["https://openalex.org/I129902397"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Haidar Moukdad","raw_affiliation_strings":["Dalhousie University, Halifax NS, Canada","Dalhousie University Halifax NS Canada"],"affiliations":[{"raw_affiliation_string":"Dalhousie University, Halifax NS, Canada","institution_ids":["https://openalex.org/I129902397"]},{"raw_affiliation_string":"Dalhousie University Halifax NS Canada","institution_ids":["https://openalex.org/I129902397"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5036544297"],"corresponding_institution_ids":["https://openalex.org/I129902397"],"apc_list":null,"apc_paid":null,"fwci":0.4284,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.68482222,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"39","issue":"1","first_page":"551","last_page":"551"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9866999983787537,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9866999983787537,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.9377999901771545,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12151","display_name":"Interpreting and Communication in Healthcare","score":0.9287999868392944,"subfield":{"id":"https://openalex.org/subfields/3600","display_name":"General Health Professions"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8353471755981445},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.7285957932472229},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6176570057868958},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5634600520133972},{"id":"https://openalex.org/keywords/language-identification","display_name":"Language identification","score":0.5433670878410339},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.5307343602180481},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4992806911468506},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.444147527217865},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4285871982574463},{"id":"https://openalex.org/keywords/search-engine","display_name":"Search engine","score":0.4173218011856079},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.4143972396850586},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.3517380356788635},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.2142113447189331},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.19401130080223083}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8353471755981445},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.7285957932472229},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6176570057868958},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5634600520133972},{"id":"https://openalex.org/C129792486","wikidata":"https://www.wikidata.org/wiki/Q1050419","display_name":"Language identification","level":3,"score":0.5433670878410339},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.5307343602180481},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4992806911468506},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.444147527217865},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4285871982574463},{"id":"https://openalex.org/C97854310","wikidata":"https://www.wikidata.org/wiki/Q19541","display_name":"Search engine","level":2,"score":0.4173218011856079},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.4143972396850586},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.3517380356788635},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.2142113447189331},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.19401130080223083},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1002/meet.14503901107","is_oa":true,"landing_page_url":"https://doi.org/10.1002/meet.14503901107","pdf_url":"https://onlinelibrary.wiley.com/doi/pdfdirect/10.1002/meet.14503901107","source":{"id":"https://openalex.org/S4306523999","display_name":"Proceedings of the American Society for Information Science and Technology","issn_l":"1550-8390","issn":["1550-8390","1936-1734"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the American Society for Information Science and Technology","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1002/meet.14503901107","is_oa":true,"landing_page_url":"https://doi.org/10.1002/meet.14503901107","pdf_url":"https://onlinelibrary.wiley.com/doi/pdfdirect/10.1002/meet.14503901107","source":{"id":"https://openalex.org/S4306523999","display_name":"Proceedings of the American Society for Information Science and Technology","issn_l":"1550-8390","issn":["1550-8390","1936-1734"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the American Society for Information Science and Technology","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.8100000023841858,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2043842914.pdf","grobid_xml":"https://content.openalex.org/works/W2043842914.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4282568653","https://openalex.org/W2119380317","https://openalex.org/W2082868123","https://openalex.org/W2070155952","https://openalex.org/W226586525","https://openalex.org/W2362145681","https://openalex.org/W2913520953","https://openalex.org/W1538826769","https://openalex.org/W4310801723","https://openalex.org/W2383292628"],"abstract_inverted_index":{"Many":[0],"search":[1,37,65,88,161,238,249,274,282,290,305,311,385,429,476,511],"engines":[2,89,162,193,239,306,413,477],"on":[3,138,493,524,534],"the":[4,9,31,64,91,98,114,131,139,152,156,192,195,216,223,246,255,325,331,337,346,372,379,384,395,407,428,432,441,448,451,468,494,499,520],"Web":[5,140,281],"offer":[6],"their":[7,34,200],"users":[8,278,309],"option":[10],"to":[11,14,59,103,203,242,261,279,310,319,335,339,349,370,505,518],"restrict":[12],"searches":[13,301,316,332],"a":[15,78,94,109,142,169,187,391,437,485,502,528],"specific":[16],"language":[17,32,69,106,122,157,442,465],"(a":[18,364],"search-by-language":[19],"feature),":[20],"and":[21,70,111,126,151,174,184,186,198,214,220,297,410,531],"therefore":[22],"only":[23],"retrieve":[24,60],"HTML":[25],"documents":[26,159,320,348,400],"that":[27,144,229,307,358,388,401,411,420],"contain":[28,361],"text":[29,72,363],"in":[30,42,50,52,67,73,77,87,108,130,147,222,286,302,321,344],"of":[33,80,84,93,116,135,155,158,178,190,211,218,232,248,254,257,265,272,330,383,397,406,434,436,443,450,453,461,464,470,475,484,488,501],"choice.":[35],"This":[36,226,496],"feature":[38],"can":[39,163],"be":[40,166,368],"indispensable":[41],"such":[43],"situations":[44],"when":[45,56],"searchers":[46,57],"are":[47],"not":[48,360],"interested":[49],"material":[51],"other":[53,127,205,455],"languages,":[54],"or":[55,76,460],"want":[58],"bilingual/multilingual":[61],"documents-documents":[62],"containing":[63,351],"term(s)":[66],"one":[68,182,280],"additional":[71],"another":[74],"language,":[75],"multitude":[79],"languages.":[81],"The":[82,133,176,315,328,472,514],"implementation":[83,210],"language-recognition":[85,136,473,507],"capabilities":[86,474,508],"necessitates":[90],"development":[92,134],"mechanism":[95],"through":[96],"which":[97,340],"indexing":[99],"software":[100],"is":[101,141,145,517],"able":[102],"recognize":[104],"identifiable":[105],"properties":[107,120],"document":[110,357,439],"consequently":[112],"indicate":[113,371],"language(s)":[115],"this":[117,444,525],"document.":[118,132,445],"These":[119],"include":[121],"tags,":[123],"character":[124],"encoding,":[125],"language-identifying":[128],"characteristics":[129],"mechanisms":[137],"process":[143,197],"still":[146],"its":[148],"infancy":[149],"stage,":[150],"successful":[153,209],"identification":[154,196],"by":[160,236,267,277,324,378,404,467],"at":[164],"best":[165],"described":[167],"as":[168,299,458],"procedure":[170],"fraught":[171],"with":[172,245,440],"inconsistencies":[173],"errors.":[175],"degree":[177,338],"success":[179,266],"varies":[180],"between":[181,394],"engine":[183,283,342,512],"another,":[185],"better":[188],"understanding":[189],"how":[191,199],"handle":[194],"results":[201,329,386],"compare":[202],"each":[204,341,355,405],"will":[206,367,478],"facilitate":[207],"more":[208,529],"language-based":[212],"searching,":[213],"identify":[215],"points":[217],"strengths":[219],"weaknesses":[221],"existing":[224],"engines.":[225,327],"poster":[227,497,526],"theorizes":[228],"hasty":[230],"implementations":[231],"language-":[233],"recognition":[234],"features":[235,259],"some":[237],"have":[240,421],"led":[241],"serious":[243],"problems":[244],"accuracy":[247],"results,":[250],"while":[251],"informed":[252],"acknowledgement":[253],"limitations":[256],"these":[258,295],"contributed":[260],"an":[262,481],"acceptable":[263],"level":[264],"others.":[268],"A":[269],"random":[270],"sample":[271],"English-language":[273],"queries":[275,296],"submitted":[276],"was":[284,376,390],"gathered":[285],"early":[287],"2002.":[288],"Unique":[289],"terms":[291],"were":[292,317,333,402,447],"extracted":[293],"from":[294,427],"entered":[298],"individual":[300],"two":[303,326,408],"major":[304],"allow":[308],"for":[312],"Arabic":[313,352,362,457],"documents.":[314,471],"limited":[318],"Arabic\u2013as":[322],"provided":[323],"analyzed":[334,369,423],"ascertain":[336],"succeeded":[343],"limiting":[345],"retrieved":[347,356,377,403,438],"those":[350],"text.":[353],"Also,":[354],"did":[359],"false":[365,415,418],"hit)":[366],"reason":[373],"why":[374],"it":[375],"engine(s).":[380],"Preliminary":[381],"analyses":[382],"showed":[387],"there":[389],"big":[392],"difference":[393],"numbers":[396],"correctly":[398],"identified":[399],"engines,":[409],"both":[412],"produced":[414],"hits.":[416],"Some":[417],"hits":[419],"been":[422],"so":[424],"far":[425],"resulted":[426],"engine's":[430],"confusing":[431],"country":[433],"origin":[435],"Others":[446],"result":[449],"misidentification":[452],"languages":[454],"than":[456],"Arabic,":[459],"incorrect":[462],"use":[463],"tags":[466],"creators":[469],"undoubtedly":[479],"become":[480],"integral":[482],"part":[483],"growing":[486],"area":[487],"research\u2013cross-language":[489],"information":[490],"retrieval":[491],"(IR)":[492],"Web.":[495],"highlights":[498],"importance":[500],"standard":[503],"approach":[504],"implementing":[506],"across":[509],"different":[510],"platforms.":[513],"future":[515],"plan":[516],"develop":[519],"exploratory":[521],"research":[522],"presented":[523],"into":[527],"focused":[530],"far-reaching":[532],"work":[533],"cross-language":[535],"IR.":[536]},"counts_by_year":[{"year":2012,"cited_by_count":1}],"updated_date":"2026-03-11T14:59:36.786465","created_date":"2025-10-10T00:00:00"}
