{"id":"https://openalex.org/W4403791258","doi":"https://doi.org/10.1145/3664647.3681655","title":"RDLNet: A Novel and Accurate Real-world Document Localization Method","display_name":"RDLNet: A Novel and Accurate Real-world Document Localization Method","publication_year":2024,"publication_date":"2024-10-26","ids":{"openalex":"https://openalex.org/W4403791258","doi":"https://doi.org/10.1145/3664647.3681655"},"language":"en","primary_location":{"id":"doi:10.1145/3664647.3681655","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3681655","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5000446732","display_name":"Yaqiang Wu","orcid":"https://orcid.org/0000-0001-8830-8250"},"institutions":[{"id":"https://openalex.org/I4210156165","display_name":"Lenovo (China)","ror":"https://ror.org/04srd9d93","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210156165"]},{"id":"https://openalex.org/I63371133","display_name":"Chongqing Jiaotong University","ror":"https://ror.org/01t001k65","country_code":"CN","type":"education","lineage":["https://openalex.org/I63371133"]},{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yaqiang Wu","raw_affiliation_strings":["Chongqing Jiaotong University, Chongqing, China","Xi'an Jiaotong University &amp; Lenovo Research, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"Chongqing Jiaotong University, Chongqing, China","institution_ids":["https://openalex.org/I63371133"]},{"raw_affiliation_string":"Xi'an Jiaotong University &amp; Lenovo Research, Xi'an, China","institution_ids":["https://openalex.org/I87445476","https://openalex.org/I4210156165"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108698836","display_name":"Zhen Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210156165","display_name":"Lenovo (China)","ror":"https://ror.org/04srd9d93","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210156165"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhen Xu","raw_affiliation_strings":["Lenovo Research, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Lenovo Research, Beijing, China","institution_ids":["https://openalex.org/I4210156165"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101718028","display_name":"Yong Duan","orcid":null},"institutions":[{"id":"https://openalex.org/I4210156165","display_name":"Lenovo (China)","ror":"https://ror.org/04srd9d93","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210156165"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yong Duan","raw_affiliation_strings":["Lenovo Research, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Lenovo Research, Beijing, China","institution_ids":["https://openalex.org/I4210156165"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102586780","display_name":"Yanlai Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210156165","display_name":"Lenovo (China)","ror":"https://ror.org/04srd9d93","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210156165"]},{"id":"https://openalex.org/I63371133","display_name":"Chongqing Jiaotong University","ror":"https://ror.org/01t001k65","country_code":"CN","type":"education","lineage":["https://openalex.org/I63371133"]},{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanlai Wu","raw_affiliation_strings":["Chongqing Jiaotong University, Chongqing, China","Xi'an Jiaotong University &amp; Lenovo Research, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"Chongqing Jiaotong University, Chongqing, China","institution_ids":["https://openalex.org/I63371133"]},{"raw_affiliation_string":"Xi'an Jiaotong University &amp; Lenovo Research, Xi'an, China","institution_ids":["https://openalex.org/I87445476","https://openalex.org/I4210156165"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041083459","display_name":"Qinghua Zheng","orcid":"https://orcid.org/0000-0002-8436-4754"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qinghua Zheng","raw_affiliation_strings":["Xi'an Jiaotong University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"Xi'an Jiaotong University, Xi'an, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024453724","display_name":"Hui Li","orcid":"https://orcid.org/0000-0001-8310-7169"},"institutions":[{"id":"https://openalex.org/I4210156165","display_name":"Lenovo (China)","ror":"https://ror.org/04srd9d93","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210156165"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hui Li","raw_affiliation_strings":["Lenovo Research, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Lenovo Research, Beijing, China","institution_ids":["https://openalex.org/I4210156165"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102396850","display_name":"Xiaoqiang Hu","orcid":"https://orcid.org/0000-0002-4310-7751"},"institutions":[{"id":"https://openalex.org/I4210156165","display_name":"Lenovo (China)","ror":"https://ror.org/04srd9d93","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210156165"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaochen Hu","raw_affiliation_strings":["Lenovo Research, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Lenovo Research, Beijing, China","institution_ids":["https://openalex.org/I4210156165"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5080674767","display_name":"Lianwen Jin","orcid":"https://orcid.org/0000-0002-5456-0957"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lianwen Jin","raw_affiliation_strings":["South China University of Technology, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"South China University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I90610280"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5000446732"],"corresponding_institution_ids":["https://openalex.org/I4210156165","https://openalex.org/I63371133","https://openalex.org/I87445476"],"apc_list":null,"apc_paid":null,"fwci":0.7479,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.73028279,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"9847","last_page":"9855"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.9876000285148621,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9772999882698059,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6747480630874634},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.38802680373191833},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3745397925376892}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6747480630874634},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38802680373191833},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3745397925376892}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3664647.3681655","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3681655","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W845365781","https://openalex.org/W1976948919","https://openalex.org/W1995220371","https://openalex.org/W2146208579","https://openalex.org/W2160072137","https://openalex.org/W2340624627","https://openalex.org/W2578797046","https://openalex.org/W2786749633","https://openalex.org/W2963150697","https://openalex.org/W2963351448","https://openalex.org/W2969005405","https://openalex.org/W2979350161","https://openalex.org/W3014177954","https://openalex.org/W3036826494","https://openalex.org/W3096609285","https://openalex.org/W3100423079","https://openalex.org/W3100763827","https://openalex.org/W3105154963","https://openalex.org/W3107331169","https://openalex.org/W3132226829","https://openalex.org/W3177126114","https://openalex.org/W4386071792","https://openalex.org/W4390874575","https://openalex.org/W4392449449"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"The":[0,102],"increasing":[1],"use":[2],"of":[3,105,125,153,162,172,187,202],"smartphones":[4],"for":[5,16,95,178],"capturing":[6],"documents":[7,98,164],"in":[8,23,99,200],"various":[9],"real-world":[10,74,179],"conditions":[11,36],"has":[12],"underscored":[13],"the":[14,81,100,118,121,128,133,136,139,149,170,185,191],"need":[15],"robust":[17],"document":[18,29,75,129,180],"localization":[19,76,130,181],"technologies.":[20],"Current":[21],"challenges":[22],"this":[24],"domain":[25],"include":[26],"handling":[27],"diverse":[28],"types,":[30],"complex":[31,53,63,82],"backgrounds,":[32],"and":[33,41,55,86,110,142,160,205],"varying":[34],"photographic":[35],"such":[37],"as":[38],"low":[39],"contrast":[40],"occlusion.":[42],"However,":[43],"there":[44],"currently":[45],"are":[46],"no":[47],"publicly":[48],"available":[49],"datasets":[50],"containing":[51],"these":[52,62,67],"scenarios":[54,83],"few":[56],"methods":[57],"demonstrate":[58],"their":[59],"capabilities":[60],"on":[61,182],"scenes.":[64],"To":[65],"address":[66],"issues,":[68],"we":[69],"create":[70],"a":[71,88,111],"new":[72],"comprehensive":[73],"benchmark":[77],"dataset":[78],"which":[79,188],"contains":[80],"mentioned":[84],"above":[85],"propose":[87],"novel":[89],"Real-world":[90],"Document":[91],"Localization":[92],"Network":[93],"(RDLNet)":[94],"locating":[96],"targeted":[97],"wild.":[101],"RDLNet":[103,119,137,173,192],"consists":[104],"an":[106],"innovative":[107],"light-SAM":[108,116],"encoder":[109],"masked":[112,140],"attention":[113,141],"decoder.":[114],"Utilizing":[115],"encoder,":[117],"transfers":[120],"mighty":[122],"generalization":[123],"capability":[124],"SAM":[126],"to":[127,146],"task.":[131],"In":[132],"decoding":[134],"stage,":[135],"exploits":[138],"object":[143],"query":[144],"method":[145],"efficiently":[147],"output":[148],"triple-branch":[150],"predictions":[151],"consisting":[152],"corner":[154],"point":[155],"coordinates,":[156],"instance-level":[157],"segmentation":[158],"area":[159],"categories":[161],"different":[163],"without":[165],"extra":[166],"post-processing.":[167],"We":[168],"compare":[169],"performance":[171],"with":[174],"other":[175],"state-of-the-art":[176],"approaches":[177],"multiple":[183],"benchmarks,":[184],"results":[186],"reveal":[189],"that":[190],"remarkably":[193],"outperforms":[194],"contemporary":[195],"methods,":[196],"demonstrating":[197],"its":[198],"superiority":[199],"terms":[201],"both":[203],"accuracy":[204],"practicability.":[206]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
