<?xml version='1.0' encoding='UTF-8'?><?xml-stylesheet href='static/style.xsl' type='text/xsl'?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2026-05-21T18:32:18Z</responseDate><request verb="GetRecord" identifier="oai:clarin.vdu.lt:20.500.11821/72" metadataPrefix="oai_dc">https://clarin.vdu.lt/oai/request</request><GetRecord><record><header><identifier>oai:clarin.vdu.lt:20.500.11821/72</identifier><datestamp>2025-10-22T17:31:57Z</datestamp><setSpec>hdl_20.500.11821_6</setSpec><setSpec>hdl_20.500.11821_7</setSpec></header><metadata><oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:doc="http://www.lyncode.com/xoai" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dc="http://purl.org/dc/elements/1.1/" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
<dc:title>English-Lithuanian Parallel Migration Corpus</dc:title>
<dc:creator>Usinskiene, Olga</dc:creator>
<dc:creator>Rackevičienė, Sigita</dc:creator>
<dc:subject>parallel corpus</dc:subject>
<dc:subject>specialized corpus</dc:subject>
<dc:subject>migration corpus</dc:subject>
<dc:subject>migration</dc:subject>
<dc:description>English-Lithuanian Parallel Migration Corpus includes original English texts and their Lithuanian translations, aligned at the sentence level. The texts are drawn from EU legal acts and other migration-related documents published in the EUR-Lex database between 1998 and 2024.&#xd;
&#xd;
The total size of the corpus is 1,223,350 words (EN - 688,410 words; LT - 534,940 words). The corpus contains 43,345 aligned segments (sentences).&#xd;
&#xd;
Within the dataset, the following files are included:&#xd;
1) EN-LT_Parallel_Migration_Corpus_TMX.zip &#xd;
   This file is composed of 51 files in TMX (translation memory exchange) format: &#xd;
   - 50 separate EN-LT TMX files with aligned texts&#xd;
   - 1 combined file consolidating all 50 EN-LT TMX files&#xd;
2) EN-LT_Parallel_Migration_Corpus_VERT.zip&#xd;
   This file is composed of 102 files in VERT (vertical text) format:&#xd;
   - 50 separate EN files with morphological annotation&#xd;
   - 1 combined EN file consolidating all 50 EN VERT files&#xd;
   - 50 separate LT files with morphological annotation&#xd;
   - 1 combined LT file consolidating all 50 LT VERT files&#xd;
   Sentence aglinment: &#xd;
   Each &lt;align> block corresponds to a TMX translation unit &lt;tu>.&#xd;
   Morphological annotation structure:&#xd;
   EN: wordform | tag | lempos (EN TreeTagger)&#xd;
   LT: wordform | lempos | tag (LT MULTEXT-East)&#xd;
   Tagset references:&#xd;
   https://www.sketchengine.eu/english-treetagger-pipeline-2/&#xd;
   https://www.sketchengine.eu/lithuanian-multext-east-part-of-speech-tagset/&#xd;
3) EN-LT_Parallel_Migration_Corpus_TXT.zip&#xd;
   This files is composed of 100 files in TXT (plain text) format: &#xd;
   - 50 separate EN files&#xd;
   - 50 separate LT files&#xd;
4) EN-LT_Parallel_Migration_Corpus_CSV(Metadata).zip&#xd;
   This file is composed of 2 files with metadata in CSV (comma separated values) format: &#xd;
   - 1 EN file with metadata&#xd;
   - 1 LT file with metadata&#xd;
   Metadata categories: Form of document, File name (CELEX number of document), Title of document, Author of document (Institution), Year of Publication, Word count, URL.&#xd;
The dataset comprises a total of 255 files, all ecoded in UTF-8.</dc:description>
<dc:date>2025-10-15</dc:date>
<dc:type>corpus</dc:type>
<dc:identifier>http://hdl.handle.net/20.500.11821/72</dc:identifier>
<dc:language>eng</dc:language>
<dc:language>lit</dc:language>
<dc:rights>PUB_CLARIN-LT_End-User-Licence-Agreement_EN-LT</dc:rights>
<dc:rights>https://clarin.vdu.lt/licenses/eula/PUB_CLARIN-LT_End-User-Licence-Agreement_EN-LT.htm</dc:rights>
<dc:rights>PUB</dc:rights>
<dc:format>application/pdf</dc:format>
<dc:format>application/pdf</dc:format>
<dc:format>application/zip</dc:format>
<dc:format>application/zip</dc:format>
<dc:format>application/zip</dc:format>
<dc:format>application/zip</dc:format>
<dc:format>text/plain; charset=utf-8</dc:format>
<dc:format>downloadable_files_count: 6</dc:format>
<dc:publisher>Mykolas Romeris University</dc:publisher>
</oai_dc:dc>
</metadata></record></GetRecord></OAI-PMH>