@techreport{Dalmas-etal:2004, author = "Dalmas, Tiphaine and Jochen L. Leidner and Webber, Bonnie and Grover, Claire and Bos, Johan", title = "Annotating CBC4Kids: A Corpus for Reading Comprehension and Question Answering Evaluation", date = "2004", keywords = "Computer corpora; linguistic XML annotation; natural language question answering (NLQA; Q&A); news stories for children; natural language understanding (NLU)", publisher = "University of Edinburgh, School of Informatics", abstract = "Reading comprehension tests are receiving increased attention within the NLP community as a controlled test-bed for developing, evaluating and comparing robust question answering (NLQA) methods. To support this, we have enriched the MITRE CBC4Kids corpus with multiple XML annotation layers recording the output of various tokenizers, lemmatizers, a stemmer, a semantic tagger, POS taggers and syntactic parsers. To demonstrate its use, we have built a baseline NLQA system for word-overlap based answer retrieval, NLQA evaluation and corpus browsing.", url = "http://www.inf.ed.ac.uk/publications/report/0204.html", number = "{EDI--INF--RR--0204}", }