From 854efb8498c4f4c640adeaff4da6854deca0eefc Mon Sep 17 00:00:00 2001 From: valot <valot@b8ef2a07-7df7-436f-90b9-41648038564b> Date: Wed, 15 Feb 2012 09:34:32 +0000 Subject: [PATCH] Separated contaminants database filter on separated class git-svn-id: https://subversion.renater.fr/xtandempipeline/trunk@214 b8ef2a07-7df7-436f-90b9-41648038564b --- .../class_msms/Identification.java | 3 +- .../FilterGroupFromFastaDatabase.java | 87 +++++++++++++++++++ .../xtandempipeline/grouping/GroupSet.java | 68 +-------------- 3 files changed, 91 insertions(+), 67 deletions(-) create mode 100644 xtandempipeline/src/fr/inra/pappso/xtandempipeline/filter_print/FilterGroupFromFastaDatabase.java diff --git a/xtandempipeline/src/fr/inra/pappso/xtandempipeline/class_msms/Identification.java b/xtandempipeline/src/fr/inra/pappso/xtandempipeline/class_msms/Identification.java index c76790a4f..71840ae57 100644 --- a/xtandempipeline/src/fr/inra/pappso/xtandempipeline/class_msms/Identification.java +++ b/xtandempipeline/src/fr/inra/pappso/xtandempipeline/class_msms/Identification.java @@ -6,6 +6,7 @@ import java.util.Collections; import org.apache.log4j.Logger; import fr.inra.pappso.xtandempipeline.MsException.MSMSException; +import fr.inra.pappso.xtandempipeline.filter_print.FilterGroupFromFastaDatabase; import fr.inra.pappso.xtandempipeline.filter_print.filter_base; import fr.inra.pappso.xtandempipeline.filter_print.print_base; import fr.inra.pappso.xtandempipeline.filter_print.swt_table_base; @@ -118,7 +119,7 @@ public class Identification { public void remove_contaminant_submatch(File database) { //TODO Remove group containing prot from this database - grouping.removeGroupFromDatabase(database); + new FilterGroupFromFastaDatabase(database).filter(grouping); } public void setProteinListValideToGroupeSet() { diff --git a/xtandempipeline/src/fr/inra/pappso/xtandempipeline/filter_print/FilterGroupFromFastaDatabase.java b/xtandempipeline/src/fr/inra/pappso/xtandempipeline/filter_print/FilterGroupFromFastaDatabase.java new file mode 100644 index 000000000..67ea419cb --- /dev/null +++ b/xtandempipeline/src/fr/inra/pappso/xtandempipeline/filter_print/FilterGroupFromFastaDatabase.java @@ -0,0 +1,87 @@ +package fr.inra.pappso.xtandempipeline.filter_print; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStreamReader; +import java.util.HashSet; + +import org.apache.log4j.Logger; + +import fr.inra.pappso.xtandempipeline.grouping.Group; +import fr.inra.pappso.xtandempipeline.grouping.GroupSet; +import fr.inra.pappso.xtandempipeline.grouping.HashSampleScanSetProt; +import fr.inra.pappso.xtandempipeline.grouping.SubGroup; + +public class FilterGroupFromFastaDatabase { + + private static final Logger logger = Logger.getLogger(FilterGroupFromFastaDatabase.class); + + private File database; + + public FilterGroupFromFastaDatabase(File database) { + logger.info("Remove Group from Fasta Database "+ database.getName()); + this.database=database; + } + + public void filter (GroupSet grouping) { + //Read fasta file to get accession list + if(database.exists()){ + logger.info("Read fasta file : "+database.getName()); + HashSet<String> access = new HashSet<String>(); + try{ + FileInputStream is = new FileInputStream(database); + InputStreamReader isr = new InputStreamReader(is); + BufferedReader br = new BufferedReader(isr); + + String line; + while((line= br.readLine())!=null){ + line.trim(); + if(line.length()>0){ + if(line.startsWith(">")){ + access.add(line.split(" ")[0].replaceFirst(">", "")); + } + } + } + }catch (Exception e) { + logger.error(e); + } + //Search contaminants proteins + logger.info("Remove Group in new mode"); + for (Group group : grouping.getGroupList()) { + boolean isconta = false; + for (SubGroup sg : group.getSubGroupSet()) { + for (HashSampleScanSetProt hashProt : sg + .getHashSampleScanSetProtSet()) { + if (access.contains(hashProt.getMatch().get_protein_match().get_cle_accession())) { + isconta = true; + } + } + } + if (isconta) + grouping.getHashSetOfGroups().remove(group); + } + }else{ + logger.error("fasta file doesnt not exist: " + database.getAbsolutePath()); + logger.info("Remove Group from Database in old mode"); + for (Group group : grouping.getGroupList()) { + boolean isconta = false; + for (SubGroup sg : group.getSubGroupSet()) { + for (HashSampleScanSetProt hashProt : sg + .getHashSampleScanSetProtSet()) { + if (hashProt.getMatch().get_protein_match().getDatabase().getDatabasePath() + .contains(database.getName())) { + isconta = true; + } + } + } + if (isconta) + grouping.getHashSetOfGroups().remove(group); + } + } + + //Numbering after contaminant removing + grouping.numbering(); + } + +} diff --git a/xtandempipeline/src/fr/inra/pappso/xtandempipeline/grouping/GroupSet.java b/xtandempipeline/src/fr/inra/pappso/xtandempipeline/grouping/GroupSet.java index 6e1d7d64a..a0dfa5d5b 100644 --- a/xtandempipeline/src/fr/inra/pappso/xtandempipeline/grouping/GroupSet.java +++ b/xtandempipeline/src/fr/inra/pappso/xtandempipeline/grouping/GroupSet.java @@ -1,9 +1,5 @@ package fr.inra.pappso.xtandempipeline.grouping; -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.InputStreamReader; import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; @@ -31,7 +27,7 @@ public class GroupSet { this.numbering(); } - private void numbering() { + public void numbering() { // numerotation ArrayList<Group> orderedGroupList = new ArrayList<Group>(groupList); @@ -86,67 +82,7 @@ public class GroupSet { return orderedGroupList; } - public void removeGroupFromDatabase(File database) { - //Read fasta file to get accession list - if(database.exists()){ - logger.info("Read fasta file : "+database.getName()); - HashSet<String> access = new HashSet<String>(); - try{ - FileInputStream is = new FileInputStream(database); - InputStreamReader isr = new InputStreamReader(is); - BufferedReader br = new BufferedReader(isr); - - String line; - while((line= br.readLine())!=null){ - line.trim(); - if(line.length()>0){ - if(line.startsWith(">")){ - access.add(line.split(" ")[0].replaceFirst(">", "")); - } - } - } - }catch (Exception e) { - logger.error(e); - } - //Search contaminants proteins - logger.info("Remove Group from Database in new mode: " + database.getName()); - for (Group group : this.getGroupList()) { - boolean isconta = false; - for (SubGroup sg : group.getSubGroupSet()) { - for (HashSampleScanSetProt hashProt : sg - .getHashSampleScanSetProtSet()) { - if (access.contains(hashProt.getMatch().get_protein_match().get_cle_accession())) { - isconta = true; - } - } - } - if (isconta) - this.groupList.remove(group); - } - }else{ - logger.error("fasta file doesnt not exist: " + database.getAbsolutePath()); - logger.info("Remove Group from Database in old mode: " + database.getName()); - for (Group group : this.getGroupList()) { - boolean isconta = false; - for (SubGroup sg : group.getSubGroupSet()) { - for (HashSampleScanSetProt hashProt : sg - .getHashSampleScanSetProtSet()) { - if (hashProt.getMatch().get_protein_match().getDatabase().getDatabasePath() - .contains(database.getName())) { - isconta = true; - } - } - } - if (isconta) - this.groupList.remove(group); - } - } - - //Numbering after contaminant removing - this.numbering(); - } - - protected HashSet<Group> getHashSetOfGroups() { + public HashSet<Group> getHashSetOfGroups() { return this.groupList; } -- GitLab