Add IYG parsing

This commit is contained in:
Helge Rausch
2014-08-30 15:04:39 +02:00
parent 0e143e1fec
commit dbc04a7b6f
3 changed files with 73 additions and 1 deletions

View File

@@ -55,7 +55,7 @@ class Parsing
rows = File.readlines(genotype.genotype.path)
.reject { |line| line.start_with?('#') } # Skip comments
stats[:rows_without_comments] = rows.length
csv = send(:"parse_#{genotype.filetype.sub('-', '_')}", rows)
csv = send(:"parse_#{genotype.filetype.sub('-', '_').downcase}", rows)
known_chromosomes = ['MT', 'X', 'Y', (1..22).map(&:to_s)].flatten
csv.select! do |row|
# snp name
@@ -180,6 +180,37 @@ class Parsing
end
end
def parse_iyg(rows)
db_snp_names = {
"MT-T3027C" => "rs199838004", "MT-T4336C" => "rs41456348",
"MT-G4580A" => "rs28357975", "MT-T5004C" => "rs41419549",
"MT-C5178a" => "rs28357984", "MT-A5390G" => "rs41333444",
"MT-C6371T" => "rs41366755", "MT-G8697A" => "rs28358886",
"MT-G9477A" => "rs2853825", "MT-G10310A" => "rs41467651",
"MT-A10550G" => "rs28358280", "MT-C10873T" => "rs2857284",
"MT-C11332T" => "rs55714831", "MT-A11947G" => "rs28359168",
"MT-A12308G" => "rs2853498", "MT-A12612G" => "rs28359172",
"MT-T14318C" => "rs28357675", "MT-T14766C" => "rs3135031",
"MT-T14783C" => "rs28357680"
}
rows.map do |row|
snp_name, local_genotype = row.split("\t")
if snp_name.start_with?('MT')
position = snp_name[/[0-9]+/]
chromosome = 'MT'
else
position = chromosome = '1'
end
[
genotype.id,
db_snp_names.fetch(snp_name, snp_name),
chromosome,
position,
local_genotype.strip
]
end
end
def execute(sql)
Genotype.connection.execute(sql)
end

View File

@@ -158,4 +158,40 @@ describe 'genotype parsing', sidekiq: :inline do
end
end
end
context 'IYG' do
let(:file) { File.open(Rails.root.join('test/data/iyg_sample.csv')) }
let(:genotype) do
create(:genotype, genotype: file, filetype: 'IYG')
end
it 'parse ancestry data', truncate: true do
# Snp
snp_data = Snp.all.map do |s|
[s.name, s.position, s.chromosome, s.genotype_frequency,
s.allele_frequency, s.ranking, s.user_snps_count]
end.sort_by { |s| s[0] }
expected = [
['rs2131925', '1', '1', {}, { 'A' => 0, 'T' => 0, 'G' => 0, 'C' => 0 }, 0, 1],
['rs2815752', '1', '1', {}, { 'A' => 0, 'T' => 0, 'G' => 0, 'C' => 0 }, 0, 1],
['rs10924081', '1', '1', {}, { 'A' => 0, 'T' => 0, 'G' => 0, 'C' => 0 }, 0, 1],
['rs199838004', '3027', 'MT', {}, { 'A' => 0, 'T' => 0, 'G' => 0, 'C' => 0 }, 0, 1],
['rs41456348', '4336', 'MT', {}, { 'A' => 0, 'T' => 0, 'G' => 0, 'C' => 0 }, 0, 1]
]
expect(snp_data).to match_array(expected)
# UserSnp
user_snps = UserSnp.all
user_snp_genotypes = user_snps.map(&:local_genotype)
expected_genotypes = %w(GT AA AA T T)
expect(user_snp_genotypes).to eq(expected_genotypes)
user_snps.each do |s|
expect(s.genotype_id).to eq(genotype.id)
expect(Snp.pluck(:name)).to include(s.snp_name)
end
end
end
end

5
test/data/iyg_sample.csv Normal file
View File

@@ -0,0 +1,5 @@
rs2131925 GT
rs2815752 AA
rs10924081 AA
MT-T3027C T
MT-T4336C T
1 rs2131925 GT
2 rs2815752 AA
3 rs10924081 AA
4 MT-T3027C T
5 MT-T4336C T