Blitvak Week 15
From LMU BioDB 2015
12/8
7121 - orf
6993 - uniprot
128 - diff
205 - query for duplicates
77 - number of records
128 + 77 = 205
java -jar xmlpipedb-match-1.1.1.jar "p?BCA[L,S,M]?[0-9][0-9][0-9][A,a]?[0-9]?[A-Z, a-z]?" < "uniprot-taxonomy%3A216591_GEN_BL12_20151119.xml"
select genetype_name_hjid, count(value) from genenametype where type = 'ORF' and value ~ 'p?BCA[LMS]?[0-9][0-9][0-9][Aa]?[0-9]?[A-Z,a-z]?' group by genetype_name_hjid;
select genetype_name_hjid, count(value) from genenametype where type = 'ORF' and value ~ 'p?BCA[LMS]?[0-9][0-9][0-9][Aa]?[0-9]?[A-Z,a-z]?' group by genetype_name_hjid having count(value) > 1;
select sum(dupe_count) from (select genetype_name_hjid, count(value) as dupe_count
from genenametype where type = 'ORF' and value ~ 'p?BCA[LMS]?[0-9][0-9][0-9][Aa]?[0-9]?[A-Z,a-z]?' group by genetype_name_hjid having count(value) > 1 order by count(value) desc) as dupe_tally;
select genetype_name_hjid, count(value) as dupe_count
from genenametype where type = 'ORF' and value ~ 'p?BCA[LMS]?[0-9][0-9][0-9][Aa]?[0-9]?[A-Z,a-z]?' group by genetype_name_hjid having count(value) > 1 order by count(value) desc;
select *
from genenametype where genetype_name_hjid = 66138;
select genetype_name_hjid, count(value) from genenametype where type = 'ORF' and value ~ 'p?BCA[LMS]?[0-9][0-9][0-9][Aa]?[0-9]?[A-Z,a-z]?' group by genetype_name_hjid having count(value) > 1 order by count(value) desc;
For Presentation
Final command for MATCH:
java -jar xmlpipedb-match-1.1.1.jar "p?BCA[L,S,M]?[0-9][0-9][0-9][A,a]?[0-9]?[A-Z, a-z]?" < "uniprot-taxonomy%3A216591_GEN_BL12_20151119.xml"
- select count(*) from genenametype where type = 'ORF' and value ~ 'p?BCA[L,S,M]?[0-9][0-9][0-9][A,a]?[0-9]?[A-Z, a-z]?';