From 32f76c182ebe988155a812bc0feb981c35f4d0fc Mon Sep 17 00:00:00 2001 From: Philipp Bayer Date: Mon, 10 Jun 2013 18:04:52 +1000 Subject: [PATCH] Re-wrote annotation dump to use CSV-library, and create a ZIP-file of several annotation-files --- lib/tasks/dump_annotation.rake | 171 ++++++++++++++++----------------- serverscript | 2 +- 2 files changed, 85 insertions(+), 88 deletions(-) diff --git a/lib/tasks/dump_annotation.rake b/lib/tasks/dump_annotation.rake index 1b15bb3..5804b3c 100644 --- a/lib/tasks/dump_annotation.rake +++ b/lib/tasks/dump_annotation.rake @@ -1,106 +1,103 @@ +require 'csv' namespace :snps do desc "Iterates over all SNPs, writes a CSV of annotation into public/" task :dump => :environment do - f = File.new("#{Rails.root}/public/annotation.csv", "w") + readme = File.new("#{Rails.root}/tmp/readme.txt", "w") # get date - f.write("File created at: #{Time.now}\n") - # dump header + readme.write("File created at: #{Time.now}\n") + readme.close() # dump mendeley - f.write("Mendeley\n") - f.write("SNP\tPosition\tChromosome\tYear\tFirst Author\tTitle\tDOI\tOpen Access\tLink\n") - MendeleyPaper.find_each do |m| - parental = m.snp - if parental == nil - puts m.snp_id - next - end - position = parental.position - name = parental.name - chrom = parental.chromosome - first_author = m.first_author - year = m.pub_year - title = m.title - doi = m.doi - oa = m.open_access - link = m.mendeley_url - f.write("#{name}\t#{position}\t#{chrom}\t#{year}\t#{first_author}\t#{title}\t#{doi}\t#{oa}\t#{link}\n") - end + CSV.open("#{Rails.root}/tmp/mendeley.csv", "wb") do |csv| + csv << ["Name", "Position", "Chromosome", "Year", "First author", "Title", "DOI", "Open Access status", "Link"] + MendeleyPaper.find_each do |m| + parental = m.snp + position = parental.position + name = parental.name + chrom = parental.chromosome + first_author = m.first_author + year = m.pub_year + title = m.title + doi = m.doi + oa = m.open_access + link = m.mendeley_url + csv << [name, position, chrom, year, first_author, title, doi, oa, link] + end + end # dump snpedia - f.write("Snpedia\n") - f.write("SNP\tPosition\tChromosome\tSummary\tLink\n") - SnpediaPaper.find_each do |sn| - parental = sn.snp - if parental == nil - puts m.snp_id - next + CSV.open("#{Rails.root}/tmp/snpedia.csv", "wb") do |csv| + csv << ["Name", "Position", "Chromosome", "Summary", "Link"] + SnpediaPaper.find_each do |sn| + parental = sn.snp + position = parental.position + name = parental.name + chrom = parental.chromosome + summary = sn.summary + link = sn.url + csv << [name, position, chrom, summary, link] end - position = parental.position - name = parental.name - chrom = parental.chromosome - summary = sn.summary - link = sn.url - f.write("#{name}\t#{position}\t#{chrom}\t#{summary}\t#{link}\n") end # dump plos - f.write("PLOS\n") - f.write("SNP\tPosition\tChromosome\tFirst author\tTitle\tDOI\tYear\n") - PlosPaper.find_each do |sp| - parental = sp.snp - if parental == nil - puts m.snp_id - next + CSV.open("#{Rails.root}/tmp/plos.csv","wb") do |csv| + csv << ["Name", "Position", "Chromosome", "Year", "First author", "Title", "DOI"] + PlosPaper.find_each do |sp| + parental = sp.snp + position = parental.position + name = parental.name + chrom = parental.chromosome + first_author = sp.first_author + title = sp.title + doi = sp.doi + year = sp.pub_date + csv << [name, position, chrom, year, first_author, title, doi] end - position = parental.position - name = parental.name - chrom = parental.chromosome - first_author = sp.first_author - title = sp.title - doi = sp.doi - year = sp.pub_date - f.write("#{name}\t#{position}\t#{chrom}\t#{year}\t#{first_author}\t#{title}\t#{doi}\n") end # dump pgp - f.write("PGP\n") - #nteger, gene: text, qualified_impact: text, inheritance: text, summary: text, trait: text, - f.write("SNP\tPosition\tChromosome\tGene\tQualified Impact\tInheritance\tSummary\tTrait\n") - PgpAnnotation.find_each do |spg| - parental = sp.snp - if parental == nil - puts m.snp_id - next + CSV.open("#{Rails.root}/tmp/pgp.csv", "wb") do |csv| + csv << ["Name", "Position", "Chromosome", "Gene", "Qualified Impact", "Inheritance", "Summary", "Trait"] + PgpAnnotation.find_each do |spg| + parental = sp.snp + position = parental.position + name = parental.name + chrom = parental.chromosome + gene = spg.gene + impact = spg.qualified_impact + inheritance = spg.inheritance + summ = spg.summary + trait = spg.trait + csv << [name, position, chrom, gene, impact, inheritance, summ, trait] end - position = parental.position - name = parental.name - chrom = parental.chromosome - gene = spg.gene - impact = spg.qualified_impact - inheritance = spg.inheritance - summ = spg.summary - trait = spg.trait - f.write("#{name}\t#{position}\t#{chrom}\t#{gene}\t#{impact}\t#{inheritance}\t#{summ}\t#{trait}\n") end # dump genome_gov - f.write("Genome.gov\n") - f.write("SNP\tPosition\tChromosome\tFirst author\tTitle\tPubmed-link\tYear\tJournal\tTrait\tp-value\tp-value description\tConfidence Interval\n") - GenomeGovPaper.find_each do |gg| - parental = gg.snp - if parental == nil - puts m.snp_id - next + CSV.open("#{Rails.root}/tmp/genome_gov.csv", "wb") do |csv| + csv << ["Name", "Position", "Chromosome", "First Author", "Title", "Pubmed link", "Year", "Journal", "Trait", "p-value", "p-value description", "Confidence Interval"] + GenomeGovPaper.find_each do |gg| + parental = gg.snp + position = parental.position + name = parental.name + chrom = parental.chromosome + author = gg.first_author + title = gg.title + pubmed = gg.pubmed_link + journal = gg.journal + year = gg.pub_date + trait = gg.trait + pvalue = gg.pvalue + pvalue_description = gg.pvalue_description + conf = gg.confidence_interval + csv << [name, position, chrom, first_author, title, pubmed, year, journal, trait, pvalue, pvalue_description, conf] end - position = parental.position - name = parental.name - chrom = parental.chromosome - author = gg.first_author - title = gg.title - pubmed = gg.pubmed_link - journal = gg.journal - year = gg.pub_date - trait = gg.trait - pvalue = gg.pvalue - pvalue_description = gg.pvalue_description - conf = gg.confidence_interval - f.write("#{name}\t#{position}\t#{chrom}\t#{first_author}\t#{title}\t#{pubmed}\t#{year}\t#{journal}\t#{trait}\t#{pvalue}\t#{pvalue_description}\t#{conf}\n") end + + # now zip the CSVs and put the zip into /public + File.delete("#{Rails.root}/public/annotation.zip") + Zip::ZipFile.open("#{Rails.root}/public/annotation.zip", Zip::ZipFile::CREATE) do |zipfile| + zipfile.add("genome_gov.csv", "#{Rails.root}/tmp/genome_gov.csv") + zipfile.add("readme.txt", "#{Rails.root}/tmp/readme.txt") + zipfile.add("pgp.csv", "#{Rails.root}/tmp/pgp.csv") + zipfile.add("mendeley.csv", "#{Rails.root}/tmp/mendeley.csv") + zipfile.add("plos.csv", "#{Rails.root}/tmp/plos.csv") + zipfile.add("snpedia.csv", "#{Rails.root}/tmp/snpedia.csv") + end + # delete the CSVs? end end diff --git a/serverscript b/serverscript index d6dcb38..9862bf5 100644 --- a/serverscript +++ b/serverscript @@ -3,4 +3,4 @@ screen -d -m -S "Server" bundle exec rails s screen -d -m -S "Solr" bundle exec rake sunspot:solr:run screen -d -m -S "Redis" redis-server -screen -d -m -S "Sidekiq" bundle exec sidekiq -q preparse,2 -q parse,2 -q deletegenotype -q fitbit -q fixphenotypes -q frequency -q genomegov -q mailnewgenotype -q mendeley_details -q mendeley -q pgp -q plos_details -q plos -q zipfulldata -q snpedia -q zipgenotyping +screen -d -m -S "Sidekiq" bundle exec sidekiq -q preparse,2 -q parse,2 -q deletegenotype -q fitbit -q fixphenotypes -q frequency -q genomegov -q mailnewgenotype -q mendeley_details -q mendeley -q pgp -q plos_details -q plos -q zipfulldata -q snpedia -q zipgenotyping -C config/sidekiq.yml -e development