diff --git a/app/workers/parsing.rb b/app/workers/parsing.rb index 8842080..97feab8 100644 --- a/app/workers/parsing.rb +++ b/app/workers/parsing.rb @@ -55,7 +55,7 @@ class Parsing rows = File.readlines(genotype.genotype.path) .reject { |line| line.start_with?('#') } # Skip comments stats[:rows_without_comments] = rows.length - csv = send(:"parse_#{genotype.filetype}", rows) + csv = send(:"parse_#{genotype.filetype.sub('-', '_')}", rows) known_chromosomes = ['MT', 'X', 'Y', (1..22).map(&:to_s)].flatten csv.select! do |row| # snp name @@ -166,6 +166,20 @@ class Parsing end end + def parse_ftdna_illumina(rows) + rows.shift if rows.first.start_with?('RSID') + rows.map do |row| + fields = row.strip.split(',') + [ + genotype.id, + fields[0].gsub('"', ''), + fields[1].gsub('"', ''), + fields[2].gsub('"', ''), + fields[3].gsub('"', '') + ] + end + end + def execute(sql) Genotype.connection.execute(sql) end diff --git a/spec/integration/genotype_parsing_and_deleting_spec.rb b/spec/integration/genotype_parsing_and_deleting_spec.rb index c3e07ec..66f5e08 100644 --- a/spec/integration/genotype_parsing_and_deleting_spec.rb +++ b/spec/integration/genotype_parsing_and_deleting_spec.rb @@ -123,4 +123,39 @@ describe 'genotype parsing', sidekiq: :inline do end end end + + context 'ftdna-illumina' do + let(:file) { File.open(Rails.root.join('test/data/ftdna-illumina_sample.csv')) } + let(:genotype) do + create(:genotype, genotype: file, filetype: 'ftdna-illumina') + end + + it 'parse ancestry data', truncate: true do + # Snp + snp_data = Snp.all.map do |s| + [s.name, s.position, s.chromosome, s.genotype_frequency, + s.allele_frequency, s.ranking, s.user_snps_count] + end.sort_by { |s| s[0] } + + expected = [ + ['rs3094315', '752566', '1', {}, { 'A' => 0, 'T' => 0, 'G' => 0, 'C' => 0 }, 0, 1], + ['rs3131972', '752721', '1', {}, { 'A' => 0, 'T' => 0, 'G' => 0, 'C' => 0 }, 0, 1], + ['rs12562034', '768448', '1', {}, { 'A' => 0, 'T' => 0, 'G' => 0, 'C' => 0 }, 0, 1], + ['rs12124819', '776546', '1', {}, { 'A' => 0, 'T' => 0, 'G' => 0, 'C' => 0 }, 0, 1], + ['rs11240777', '798959', '1', {}, { 'A' => 0, 'T' => 0, 'G' => 0, 'C' => 0 }, 0, 1] + ] + + expect(snp_data).to match_array(expected) + + # UserSnp + user_snps = UserSnp.all + user_snp_genotypes = user_snps.map(&:local_genotype) + expected_genotypes = %w(AA GG GG AA AG) + expect(user_snp_genotypes).to eq(expected_genotypes) + user_snps.each do |s| + expect(s.genotype_id).to eq(genotype.id) + expect(Snp.pluck(:name)).to include(s.snp_name) + end + end + end end diff --git a/test/data/ftdna-illumina_sample.csv b/test/data/ftdna-illumina_sample.csv new file mode 100644 index 0000000..357459b --- /dev/null +++ b/test/data/ftdna-illumina_sample.csv @@ -0,0 +1,6 @@ +RSID,CHROMOSOME,POSITION,RESULT +"rs3094315","1","752566","AA" +"rs3131972","1","752721","GG" +"rs12562034","1","768448","GG" +"rs12124819","1","776546","AA" +"rs11240777","1","798959","AG"