@article{Kamphuis-2023-MMEAD:,
title = "MMEAD: MS MARCO Entity Annotations and Disambiguations",
author = "Kamphuis, Chris and
Lin, Aileen and
Yang, S. A. and
Lin, Jimmy and
Vries, Arjen P. de and
Hasibi, Faegheh and
Kamphuis, Chris and
Lin, Aileen and
Yang, S. A. and
Lin, Jimmy and
Vries, Arjen P. de and
Hasibi, Faegheh",
journal = "Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval",
year = "2023",
publisher = "ACM",
url = "https://gwf-uwaterloo.github.io/gwf-publications/G23-20001",
doi = "10.1145/3539618.3591887",
abstract = "MMEAD, or MS MARCO Entity Annotations and Disambiguations, is a resource for entity links for the MS MARCO datasets. We specify a format to store and share links for both document and passage collections of MS MARCO. Following this specification, we release entity links to Wikipedia for documents and passages in both MS MARCO collections (v1 and v2). Entity links have been produced by the REL and BLINK systems. MMEAD is an easy-to-install Python package, allowing users to load the link data and entity embeddings effortlessly. Using MMEAD takes only a few lines of code. Finally, we show how MMEAD can be used for IR research that uses entity information. We show how to improve recall@1000 and MRR@10 on more complex queries on the MS MARCO v1 passage dataset by using this resource. We also demonstrate how entity expansions can be used for interactive search applications.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="Kamphuis-2023-MMEAD:">
<titleInfo>
<title>MMEAD: MS MARCO Entity Annotations and Disambiguations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Kamphuis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aileen</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">S</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jimmy</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arjen</namePart>
<namePart type="given">P</namePart>
<namePart type="given">de</namePart>
<namePart type="family">Vries</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Faegheh</namePart>
<namePart type="family">Hasibi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>ACM</publisher>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>MMEAD, or MS MARCO Entity Annotations and Disambiguations, is a resource for entity links for the MS MARCO datasets. We specify a format to store and share links for both document and passage collections of MS MARCO. Following this specification, we release entity links to Wikipedia for documents and passages in both MS MARCO collections (v1 and v2). Entity links have been produced by the REL and BLINK systems. MMEAD is an easy-to-install Python package, allowing users to load the link data and entity embeddings effortlessly. Using MMEAD takes only a few lines of code. Finally, we show how MMEAD can be used for IR research that uses entity information. We show how to improve recall@1000 and MRR@10 on more complex queries on the MS MARCO v1 passage dataset by using this resource. We also demonstrate how entity expansions can be used for interactive search applications.</abstract>
<identifier type="citekey">Kamphuis-2023-MMEAD:</identifier>
<identifier type="doi">10.1145/3539618.3591887</identifier>
<location>
<url>https://gwf-uwaterloo.github.io/gwf-publications/G23-20001</url>
</location>
<part>
<date>2023</date>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T MMEAD: MS MARCO Entity Annotations and Disambiguations
%A Kamphuis, Chris
%A Lin, Aileen
%A Yang, S. A.
%A Lin, Jimmy
%A Vries, Arjen P. de
%A Hasibi, Faegheh
%J Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval
%D 2023
%I ACM
%F Kamphuis-2023-MMEAD:
%X MMEAD, or MS MARCO Entity Annotations and Disambiguations, is a resource for entity links for the MS MARCO datasets. We specify a format to store and share links for both document and passage collections of MS MARCO. Following this specification, we release entity links to Wikipedia for documents and passages in both MS MARCO collections (v1 and v2). Entity links have been produced by the REL and BLINK systems. MMEAD is an easy-to-install Python package, allowing users to load the link data and entity embeddings effortlessly. Using MMEAD takes only a few lines of code. Finally, we show how MMEAD can be used for IR research that uses entity information. We show how to improve recall@1000 and MRR@10 on more complex queries on the MS MARCO v1 passage dataset by using this resource. We also demonstrate how entity expansions can be used for interactive search applications.
%R 10.1145/3539618.3591887
%U https://gwf-uwaterloo.github.io/gwf-publications/G23-20001
%U https://doi.org/10.1145/3539618.3591887
Markdown (Informal)
[MMEAD: MS MARCO Entity Annotations and Disambiguations](https://gwf-uwaterloo.github.io/gwf-publications/G23-20001) (Kamphuis et al., GWF 2023)
ACL
- Chris Kamphuis, Aileen Lin, S. A. Yang, Jimmy Lin, Arjen P. de Vries, Faegheh Hasibi, Chris Kamphuis, Aileen Lin, S. A. Yang, Jimmy Lin, Arjen P. de Vries, and Faegheh Hasibi. 2023. MMEAD: MS MARCO Entity Annotations and Disambiguations. Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval.