{"id":"https://openalex.org/W4284704757","doi":"https://doi.org/10.1145/3477495.3532062","title":"Structure and Semantics Preserving Document Representations","display_name":"Structure and Semantics Preserving Document Representations","publication_year":2022,"publication_date":"2022-07-06","ids":{"openalex":"https://openalex.org/W4284704757","doi":"https://doi.org/10.1145/3477495.3532062"},"language":"en","primary_location":{"id":"doi:10.1145/3477495.3532062","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3477495.3532062","pdf_url":null,"source":{"id":"https://openalex.org/S4363608773","display_name":"Proceedings of the 45th International ACM SIGIR Conference on Research and Development in Information Retrieval","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 45th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065006343","display_name":"Natraj Raman","orcid":"https://orcid.org/0009-0008-8866-1482"},"institutions":[{"id":"https://openalex.org/I4210125307","display_name":"Morgan Stanley (United Kingdom)","ror":"https://ror.org/03csd5507","country_code":"GB","type":"company","lineage":["https://openalex.org/I2802755631","https://openalex.org/I4210125307"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Natraj Raman","raw_affiliation_strings":["J.P. Morgan AI Research, London, United Kingdom"],"affiliations":[{"raw_affiliation_string":"J.P. Morgan AI Research, London, United Kingdom","institution_ids":["https://openalex.org/I4210125307"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087647155","display_name":"Sameena Shah","orcid":"https://orcid.org/0009-0000-5960-5811"},"institutions":[{"id":"https://openalex.org/I2802755631","display_name":"Morgan Stanley (United States)","ror":"https://ror.org/00aphdz18","country_code":"US","type":"company","lineage":["https://openalex.org/I2802755631"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sameena Shah","raw_affiliation_strings":["J.P. Morgan AI Research, New York, NY, USA"],"affiliations":[{"raw_affiliation_string":"J.P. Morgan AI Research, New York, NY, USA","institution_ids":["https://openalex.org/I2802755631"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088276691","display_name":"Manuela Veloso","orcid":"https://orcid.org/0000-0001-6738-238X"},"institutions":[{"id":"https://openalex.org/I2802755631","display_name":"Morgan Stanley (United States)","ror":"https://ror.org/00aphdz18","country_code":"US","type":"company","lineage":["https://openalex.org/I2802755631"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Manuela Veloso","raw_affiliation_strings":["J.P. Morgan AI Research, New York, NY, USA"],"affiliations":[{"raw_affiliation_string":"J.P. Morgan AI Research, New York, NY, USA","institution_ids":["https://openalex.org/I2802755631"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5065006343"],"corresponding_institution_ids":["https://openalex.org/I4210125307"],"apc_list":null,"apc_paid":null,"fwci":0.4909,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.57977883,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"780","last_page":"790"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8325506448745728},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6829137206077576},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.6409881711006165},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.614339292049408},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5257238745689392},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5185037851333618},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.49912142753601074},{"id":"https://openalex.org/keywords/document-retrieval","display_name":"Document retrieval","score":0.45370596647262573},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.45326727628707886},{"id":"https://openalex.org/keywords/document-structure-description","display_name":"Document Structure Description","score":0.44334033131599426},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.4167178273200989},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.38651740550994873},{"id":"https://openalex.org/keywords/xml","display_name":"XML","score":0.14058762788772583},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.12714141607284546},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.10245007276535034}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8325506448745728},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6829137206077576},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.6409881711006165},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.614339292049408},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5257238745689392},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5185037851333618},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.49912142753601074},{"id":"https://openalex.org/C161156560","wikidata":"https://www.wikidata.org/wiki/Q1638872","display_name":"Document retrieval","level":2,"score":0.45370596647262573},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45326727628707886},{"id":"https://openalex.org/C68699486","wikidata":"https://www.wikidata.org/wiki/Q265904","display_name":"Document Structure Description","level":3,"score":0.44334033131599426},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.4167178273200989},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.38651740550994873},{"id":"https://openalex.org/C8797682","wikidata":"https://www.wikidata.org/wiki/Q2115","display_name":"XML","level":2,"score":0.14058762788772583},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.12714141607284546},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.10245007276535034},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3477495.3532062","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3477495.3532062","pdf_url":null,"source":{"id":"https://openalex.org/S4363608773","display_name":"Proceedings of the 45th International ACM SIGIR Conference on Research and Development in Information Retrieval","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 45th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.47999998927116394,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W1997423738","https://openalex.org/W2024833814","https://openalex.org/W2187089797","https://openalex.org/W2275624892","https://openalex.org/W2470394683","https://openalex.org/W2606377603","https://openalex.org/W2787905871","https://openalex.org/W2894176037","https://openalex.org/W2895347732","https://openalex.org/W2922386288","https://openalex.org/W2949024437","https://openalex.org/W2962985038","https://openalex.org/W2963026686","https://openalex.org/W2963157208","https://openalex.org/W2963744743","https://openalex.org/W2963775347","https://openalex.org/W2964271799","https://openalex.org/W2969656782","https://openalex.org/W2970641574","https://openalex.org/W2971865858","https://openalex.org/W2997403743","https://openalex.org/W3024272706","https://openalex.org/W3034303554","https://openalex.org/W3035324702","https://openalex.org/W3101622805","https://openalex.org/W3102554291","https://openalex.org/W3105310509","https://openalex.org/W3108373531","https://openalex.org/W3175593095","https://openalex.org/W4252076394","https://openalex.org/W4391156274"],"related_works":["https://openalex.org/W2375480909","https://openalex.org/W2353314428","https://openalex.org/W2012019886","https://openalex.org/W2166090428","https://openalex.org/W2381021552","https://openalex.org/W2354749003","https://openalex.org/W2027019938","https://openalex.org/W2079058854","https://openalex.org/W1518053583","https://openalex.org/W343133241"],"abstract_inverted_index":{"Retrieving":[0],"relevant":[1,108],"documents":[2,26,129],"from":[3],"a":[4,59,95],"corpus":[5],"is":[6,142],"typically":[7],"based":[8],"on":[9,159],"the":[10,14,29,52,79,84,120,124,128,135],"semantic":[11,34],"similarity":[12],"between":[13,25,127],"document":[15,64,91,103,163],"content":[16,69],"and":[17,48,93,112,145],"query":[18,148],"text.":[19],"The":[20,140],"inclusion":[21],"of":[22,51],"structural":[23],"relationship":[24,85,138],"can":[27],"benefit":[28],"retrieval":[30,164],"mechanism":[31],"by":[32,66],"addressing":[33],"gaps.":[35],"However,":[36],"incorporating":[37],"these":[38],"relationships":[39],"requires":[40],"tractable":[41],"mechanisms":[42],"that":[43,100,105,154],"balance":[44],"structure":[45,82],"with":[46,70],"semantics":[47],"take":[49],"advantage":[50],"prevalent":[53],"pre-train/fine-tune":[54],"paradigm.":[55],"We":[56,152],"propose":[57],"here":[58],"holistic":[60],"approach":[61],"to":[62,87,109,115,133],"learning":[63,76],"representations":[65],"integrating":[67],"intra-document":[68],"inter-document":[71],"relations.":[72],"Our":[73],"deep":[74],"metric":[75],"solution":[77],"analyzes":[78],"complex":[80],"neighborhood":[81],"in":[83,119,137],"network":[86],"efficiently":[88],"sample":[89],"similar/dissimilar":[90],"pairs":[92,104],"defines":[94],"novel":[96],"quintuplet":[97],"loss":[98],"function":[99],"simultaneously":[101],"encourages":[102],"are":[106,130],"semantically":[107],"be":[110,116],"closer":[111],"structurally":[113],"unrelated":[114],"far":[117],"apart":[118],"representation":[121],"space.":[122],"Furthermore,":[123],"separation":[125],"margins":[126],"varied":[131],"flexibly":[132],"encode":[134],"heterogeneity":[136],"strengths.":[139],"model":[141],"fully":[143],"fine-tunable":[144],"natively":[146],"supports":[147],"projection":[149],"during":[150],"inference.":[151],"demonstrate":[153],"it":[155],"outperforms":[156],"competing":[157],"methods":[158],"multiple":[160],"datasets":[161],"for":[162],"tasks.":[165]},"counts_by_year":[{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
