diff --git a/app/workers/parsing.rb b/app/workers/parsing.rb index 97feab8..9c8e125 100644 --- a/app/workers/parsing.rb +++ b/app/workers/parsing.rb @@ -55,7 +55,7 @@ class Parsing rows = File.readlines(genotype.genotype.path) .reject { |line| line.start_with?('#') } # Skip comments stats[:rows_without_comments] = rows.length - csv = send(:"parse_#{genotype.filetype.sub('-', '_')}", rows) + csv = send(:"parse_#{genotype.filetype.sub('-', '_').downcase}", rows) known_chromosomes = ['MT', 'X', 'Y', (1..22).map(&:to_s)].flatten csv.select! do |row| # snp name @@ -180,6 +180,37 @@ class Parsing end end + def parse_iyg(rows) + db_snp_names = { + "MT-T3027C" => "rs199838004", "MT-T4336C" => "rs41456348", + "MT-G4580A" => "rs28357975", "MT-T5004C" => "rs41419549", + "MT-C5178a" => "rs28357984", "MT-A5390G" => "rs41333444", + "MT-C6371T" => "rs41366755", "MT-G8697A" => "rs28358886", + "MT-G9477A" => "rs2853825", "MT-G10310A" => "rs41467651", + "MT-A10550G" => "rs28358280", "MT-C10873T" => "rs2857284", + "MT-C11332T" => "rs55714831", "MT-A11947G" => "rs28359168", + "MT-A12308G" => "rs2853498", "MT-A12612G" => "rs28359172", + "MT-T14318C" => "rs28357675", "MT-T14766C" => "rs3135031", + "MT-T14783C" => "rs28357680" + } + rows.map do |row| + snp_name, local_genotype = row.split("\t") + if snp_name.start_with?('MT') + position = snp_name[/[0-9]+/] + chromosome = 'MT' + else + position = chromosome = '1' + end + [ + genotype.id, + db_snp_names.fetch(snp_name, snp_name), + chromosome, + position, + local_genotype.strip + ] + end + end + def execute(sql) Genotype.connection.execute(sql) end diff --git a/spec/integration/genotype_parsing_and_deleting_spec.rb b/spec/integration/genotype_parsing_and_deleting_spec.rb index 66f5e08..6b024c1 100644 --- a/spec/integration/genotype_parsing_and_deleting_spec.rb +++ b/spec/integration/genotype_parsing_and_deleting_spec.rb @@ -158,4 +158,40 @@ describe 'genotype parsing', sidekiq: :inline do end end end + + context 'IYG' do + let(:file) { File.open(Rails.root.join('test/data/iyg_sample.csv')) } + let(:genotype) do + create(:genotype, genotype: file, filetype: 'IYG') + end + + it 'parse ancestry data', truncate: true do + # Snp + snp_data = Snp.all.map do |s| + [s.name, s.position, s.chromosome, s.genotype_frequency, + s.allele_frequency, s.ranking, s.user_snps_count] + end.sort_by { |s| s[0] } + + expected = [ + ['rs2131925', '1', '1', {}, { 'A' => 0, 'T' => 0, 'G' => 0, 'C' => 0 }, 0, 1], + ['rs2815752', '1', '1', {}, { 'A' => 0, 'T' => 0, 'G' => 0, 'C' => 0 }, 0, 1], + ['rs10924081', '1', '1', {}, { 'A' => 0, 'T' => 0, 'G' => 0, 'C' => 0 }, 0, 1], + ['rs199838004', '3027', 'MT', {}, { 'A' => 0, 'T' => 0, 'G' => 0, 'C' => 0 }, 0, 1], + ['rs41456348', '4336', 'MT', {}, { 'A' => 0, 'T' => 0, 'G' => 0, 'C' => 0 }, 0, 1] + ] + + expect(snp_data).to match_array(expected) + + # UserSnp + user_snps = UserSnp.all + user_snp_genotypes = user_snps.map(&:local_genotype) + expected_genotypes = %w(GT AA AA T T) + expect(user_snp_genotypes).to eq(expected_genotypes) + user_snps.each do |s| + expect(s.genotype_id).to eq(genotype.id) + expect(Snp.pluck(:name)).to include(s.snp_name) + end + end + end + end diff --git a/test/data/iyg_sample.csv b/test/data/iyg_sample.csv new file mode 100644 index 0000000..f542b5c --- /dev/null +++ b/test/data/iyg_sample.csv @@ -0,0 +1,5 @@ +rs2131925 GT +rs2815752 AA +rs10924081 AA +MT-T3027C T +MT-T4336C T