-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathcc2013.bib
More file actions
66 lines (60 loc) · 4.2 KB
/
Copy pathcc2013.bib
File metadata and controls
66 lines (60 loc) · 4.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
@Article{cc:BirchDurraniKoehn:2013:Edinburgh-SLT-and-MT-system,
year = "2013",
author = "Alexandra Birch and Nadir Durrani and Phillip Koehn",
title = "Edinburgh {SLT} and {MT} System Description for the {IWSLT} 2013",
URL = "http://workshop2013.iwslt.org/downloads/Edinburgh_SLT_and_MT_System_Description_for_the_IWSLT_2013_Evaluation.pdf",
cc-author-affiliation = "School of Informatics, University of Edinburgh",
}
@Article{cc:SmithSaint-AmandPlamadaKoehnEtAl:2013:Dirt-cheap-web-scale-parallel-corpus,
year = "2013",
author = "Jason R. Smith and Herve Saint-Amand and Magdalena Plamada and Phillipp Koehn and Chris Callison-Burch
and Adam Lopez",
title = "Dirt Cheap Web-Scale Parallel Text from the Common Crawl",
pages = "1374–1383",
URL = "http://www.aclweb.org/anthology/P13-1135",
abstract = "Parallel text is the fuel that drives modern machine translation systems. The Web is a comprehensive
source of preexisting parallel text, but crawling the entire web is impossible for all but the largest
companies. We bring web-scale parallel text to the masses by mining the Common Crawl, a public Web
crawl hosted on Amazon 19s Elastic Cloud. Starting from nothing more than a set of common two-letter
language codes, our open-source extension of the STRAND algorithm mined 32 terabytes of the crawl in
just under a day, at a cost of about $500. Our large-scale experiment uncovers large amounts of
parallel text in dozens of language pairs across a variety of domains and genres, some previously
unavailable in curated datasets. Even with minimal cleaning and filtering, the resulting data boosts
translation performance across the board for five different language pairs in the news domain, and on
open domain test sets we see improvements of up to 5 BLEU. We make our code and data available for
other researchers seeking to mine this rich new data resource.",
publisher = "Association for Computational Linguistics",
cc-author-affiliation = "Johns Hopkins University, University of Edinburgh, University of Zurich, University of
Pennsylvania",
}
@Article{cc:StymneHardmeierTiedemannNivre:2013:Tunable-distortion-limits,
year = "2013",
author = "Sara Stymne and Christian Hardmeier and Jorg Tiedemann and Joakim Nivre",
title = "Tunable Distortion Limits and Corpus Cleaning for {SMT}",
URL = "http://statmt.org/wmt13/pdf/WMT29.pdf",
cc-author-affiliation = "Uppsala University: Department of Linguistics and Philology",
}
@Article{cc:HaHerrmannNiehuesMedianiEtAl:2013:KIT-translation-systems-for-IWSLT-2013,
year = "2013",
author = "Thanh-Le Ha and Teresa Herrmann and Jan Niehues and Mohammed Mediani and Eunah Cho and Yuqi Zhang and
Isabel Slawik and Alex Waibel",
title = "The {KIT} Translation Systems for {IWSLT} 2013",
URL = "http://workshop2013.iwslt.org/downloads/The_KIT_Translation_Systems_for_IWSLT_2013.pdf",
cc-author-affiliation = "Institute for Anthropomatics",
}
@Article{cc:DrijfhoutJundtWeversHiemstra:2013:Traitor,
year = "2013",
author = "Wanno Drijfhout and Oliver Jundt and Lesley Wevers and Djoerd Hiemstra",
title = "Traitor: Associating Concepts using the World Wide Web",
URL = "http://doc.utwente.nl/88328/",
cc-author-affiliation = "University of Twente",
}
@Article{cc:BizerEckertMeuselMühleisenEtAl:2013:RDFa-microdata-microformats,
year = "2013",
author = "Christian Bizer and Kai Eckert and Robert Meusel and Hannes Mühleisen and Michael Schuhmacher and
Johanna Völker",
title = "Deployment of {RDF}a, Microdata, and Microformats on the Web – {A} Quantitative Analysis",
URL = "http://hannes.muehleisen.org/Bizer-etal-DeploymentRDFaMicrodataMicroformats-ISWC-InUse-2013.pdf",
cc-author-affiliation = "Data and Web Science Group – University of Mannhein, Database Architectures Group, Centrum
Wiskunde & Informatica, Netherlands",
}