mirror of
https://github.com/chenasraf/snpr.git
synced 2026-05-18 01:39:01 +00:00
Add IYG parsing
This commit is contained in:
@@ -55,7 +55,7 @@ class Parsing
|
||||
rows = File.readlines(genotype.genotype.path)
|
||||
.reject { |line| line.start_with?('#') } # Skip comments
|
||||
stats[:rows_without_comments] = rows.length
|
||||
csv = send(:"parse_#{genotype.filetype.sub('-', '_')}", rows)
|
||||
csv = send(:"parse_#{genotype.filetype.sub('-', '_').downcase}", rows)
|
||||
known_chromosomes = ['MT', 'X', 'Y', (1..22).map(&:to_s)].flatten
|
||||
csv.select! do |row|
|
||||
# snp name
|
||||
@@ -180,6 +180,37 @@ class Parsing
|
||||
end
|
||||
end
|
||||
|
||||
def parse_iyg(rows)
|
||||
db_snp_names = {
|
||||
"MT-T3027C" => "rs199838004", "MT-T4336C" => "rs41456348",
|
||||
"MT-G4580A" => "rs28357975", "MT-T5004C" => "rs41419549",
|
||||
"MT-C5178a" => "rs28357984", "MT-A5390G" => "rs41333444",
|
||||
"MT-C6371T" => "rs41366755", "MT-G8697A" => "rs28358886",
|
||||
"MT-G9477A" => "rs2853825", "MT-G10310A" => "rs41467651",
|
||||
"MT-A10550G" => "rs28358280", "MT-C10873T" => "rs2857284",
|
||||
"MT-C11332T" => "rs55714831", "MT-A11947G" => "rs28359168",
|
||||
"MT-A12308G" => "rs2853498", "MT-A12612G" => "rs28359172",
|
||||
"MT-T14318C" => "rs28357675", "MT-T14766C" => "rs3135031",
|
||||
"MT-T14783C" => "rs28357680"
|
||||
}
|
||||
rows.map do |row|
|
||||
snp_name, local_genotype = row.split("\t")
|
||||
if snp_name.start_with?('MT')
|
||||
position = snp_name[/[0-9]+/]
|
||||
chromosome = 'MT'
|
||||
else
|
||||
position = chromosome = '1'
|
||||
end
|
||||
[
|
||||
genotype.id,
|
||||
db_snp_names.fetch(snp_name, snp_name),
|
||||
chromosome,
|
||||
position,
|
||||
local_genotype.strip
|
||||
]
|
||||
end
|
||||
end
|
||||
|
||||
def execute(sql)
|
||||
Genotype.connection.execute(sql)
|
||||
end
|
||||
|
||||
@@ -158,4 +158,40 @@ describe 'genotype parsing', sidekiq: :inline do
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
context 'IYG' do
|
||||
let(:file) { File.open(Rails.root.join('test/data/iyg_sample.csv')) }
|
||||
let(:genotype) do
|
||||
create(:genotype, genotype: file, filetype: 'IYG')
|
||||
end
|
||||
|
||||
it 'parse ancestry data', truncate: true do
|
||||
# Snp
|
||||
snp_data = Snp.all.map do |s|
|
||||
[s.name, s.position, s.chromosome, s.genotype_frequency,
|
||||
s.allele_frequency, s.ranking, s.user_snps_count]
|
||||
end.sort_by { |s| s[0] }
|
||||
|
||||
expected = [
|
||||
['rs2131925', '1', '1', {}, { 'A' => 0, 'T' => 0, 'G' => 0, 'C' => 0 }, 0, 1],
|
||||
['rs2815752', '1', '1', {}, { 'A' => 0, 'T' => 0, 'G' => 0, 'C' => 0 }, 0, 1],
|
||||
['rs10924081', '1', '1', {}, { 'A' => 0, 'T' => 0, 'G' => 0, 'C' => 0 }, 0, 1],
|
||||
['rs199838004', '3027', 'MT', {}, { 'A' => 0, 'T' => 0, 'G' => 0, 'C' => 0 }, 0, 1],
|
||||
['rs41456348', '4336', 'MT', {}, { 'A' => 0, 'T' => 0, 'G' => 0, 'C' => 0 }, 0, 1]
|
||||
]
|
||||
|
||||
expect(snp_data).to match_array(expected)
|
||||
|
||||
# UserSnp
|
||||
user_snps = UserSnp.all
|
||||
user_snp_genotypes = user_snps.map(&:local_genotype)
|
||||
expected_genotypes = %w(GT AA AA T T)
|
||||
expect(user_snp_genotypes).to eq(expected_genotypes)
|
||||
user_snps.each do |s|
|
||||
expect(s.genotype_id).to eq(genotype.id)
|
||||
expect(Snp.pluck(:name)).to include(s.snp_name)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
5
test/data/iyg_sample.csv
Normal file
5
test/data/iyg_sample.csv
Normal file
@@ -0,0 +1,5 @@
|
||||
rs2131925 GT
|
||||
rs2815752 AA
|
||||
rs10924081 AA
|
||||
MT-T3027C T
|
||||
MT-T4336C T
|
||||
|
Reference in New Issue
Block a user