Add ftdna parsing

This commit is contained in:
Helge Rausch
2014-08-30 13:42:51 +02:00
parent ed55e1d900
commit 0e143e1fec
3 changed files with 56 additions and 1 deletions

View File

@@ -55,7 +55,7 @@ class Parsing
rows = File.readlines(genotype.genotype.path)
.reject { |line| line.start_with?('#') } # Skip comments
stats[:rows_without_comments] = rows.length
csv = send(:"parse_#{genotype.filetype}", rows)
csv = send(:"parse_#{genotype.filetype.sub('-', '_')}", rows)
known_chromosomes = ['MT', 'X', 'Y', (1..22).map(&:to_s)].flatten
csv.select! do |row|
# snp name
@@ -166,6 +166,20 @@ class Parsing
end
end
def parse_ftdna_illumina(rows)
rows.shift if rows.first.start_with?('RSID')
rows.map do |row|
fields = row.strip.split(',')
[
genotype.id,
fields[0].gsub('"', ''),
fields[1].gsub('"', ''),
fields[2].gsub('"', ''),
fields[3].gsub('"', '')
]
end
end
def execute(sql)
Genotype.connection.execute(sql)
end

View File

@@ -123,4 +123,39 @@ describe 'genotype parsing', sidekiq: :inline do
end
end
end
context 'ftdna-illumina' do
let(:file) { File.open(Rails.root.join('test/data/ftdna-illumina_sample.csv')) }
let(:genotype) do
create(:genotype, genotype: file, filetype: 'ftdna-illumina')
end
it 'parse ancestry data', truncate: true do
# Snp
snp_data = Snp.all.map do |s|
[s.name, s.position, s.chromosome, s.genotype_frequency,
s.allele_frequency, s.ranking, s.user_snps_count]
end.sort_by { |s| s[0] }
expected = [
['rs3094315', '752566', '1', {}, { 'A' => 0, 'T' => 0, 'G' => 0, 'C' => 0 }, 0, 1],
['rs3131972', '752721', '1', {}, { 'A' => 0, 'T' => 0, 'G' => 0, 'C' => 0 }, 0, 1],
['rs12562034', '768448', '1', {}, { 'A' => 0, 'T' => 0, 'G' => 0, 'C' => 0 }, 0, 1],
['rs12124819', '776546', '1', {}, { 'A' => 0, 'T' => 0, 'G' => 0, 'C' => 0 }, 0, 1],
['rs11240777', '798959', '1', {}, { 'A' => 0, 'T' => 0, 'G' => 0, 'C' => 0 }, 0, 1]
]
expect(snp_data).to match_array(expected)
# UserSnp
user_snps = UserSnp.all
user_snp_genotypes = user_snps.map(&:local_genotype)
expected_genotypes = %w(AA GG GG AA AG)
expect(user_snp_genotypes).to eq(expected_genotypes)
user_snps.each do |s|
expect(s.genotype_id).to eq(genotype.id)
expect(Snp.pluck(:name)).to include(s.snp_name)
end
end
end
end

View File

@@ -0,0 +1,6 @@
RSID,CHROMOSOME,POSITION,RESULT
"rs3094315","1","752566","AA"
"rs3131972","1","752721","GG"
"rs12562034","1","768448","GG"
"rs12124819","1","776546","AA"
"rs11240777","1","798959","AG"
1 RSID CHROMOSOME POSITION RESULT
2 rs3094315 1 752566 AA
3 rs3131972 1 752721 GG
4 rs12562034 1 768448 GG
5 rs12124819 1 776546 AA
6 rs11240777 1 798959 AG