Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
xtpcpp
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
PAPPSO
xtpcpp
Commits
d49d4a75
Commit
d49d4a75
authored
7 years ago
by
Langella Olivier
Browse files
Options
Downloads
Patches
Plain Diff
WIP: Mascot parser
parent
3581ca04
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/input/mascot/mascotdatparser.cpp
+43
-21
43 additions, 21 deletions
src/input/mascot/mascotdatparser.cpp
src/input/mascot/mascotdatparser.h
+14
-1
14 additions, 1 deletion
src/input/mascot/mascotdatparser.h
with
57 additions
and
22 deletions
src/input/mascot/mascotdatparser.cpp
+
43
−
21
View file @
d49d4a75
...
...
@@ -147,6 +147,39 @@ void MascotDatParser::parseHeaderLine(const QString & header_line) {
}
}
void
MascotDatParser
::
saveAndClearPeptide
()
{
qDebug
()
<<
"MascotDatParser::saveAndClearPeptide begin"
;
if
(
_current_peptide
.
query_index
>
0
)
{
//parse and save
pappso
::
Peptide
peptide
(
_current_peptide
.
peptide_string_list
.
at
(
4
));
if
(
_current_peptide
.
protein_string_list
.
size
()
!=
_current_peptide
.
fasta_file_list
.
size
())
{
throw
pappso
::
PappsoException
(
QObject
::
tr
(
"ERROR (_current_peptide.protein_string_list.size() != _current_peptide.fasta_file_list.size()) %1"
).
arg
(
_current_peptide
.
protein_string_list
.
join
(
",
\"
"
)));
}
foreach
(
const
QString
&
str
,
_current_peptide
.
protein_string_list
)
{
//sp|O95006|OR2F2_HUMAN":0:299:303:1
int
position
=
str
.
indexOf
(
"
\"
"
,
0
);
QString
accession
=
str
.
mid
(
0
,
position
);
qDebug
()
<<
"accession="
<<
accession
;
QStringList
position_list
=
str
.
mid
(
position
+
2
).
split
(
":"
);
if
(
position_list
.
size
()
!=
4
)
{
throw
pappso
::
PappsoException
(
QObject
::
tr
(
"ERROR position_list.size() != 4 %1"
).
arg
(
str
));
}
unsigned
int
start
=
position_list
.
at
(
1
).
toUInt
();
unsigned
int
stop
=
position_list
.
at
(
2
).
toUInt
();
}
}
//new peptide query clear
_current_peptide
.
peptide_string_list
.
clear
();
_current_peptide
.
fasta_file_list
.
clear
();
_current_peptide
.
query_index
=
0
;
_current_peptide
.
subst
=
""
;
qDebug
()
<<
"MascotDatParser::saveAndClearPeptide end"
;
}
void
MascotDatParser
::
parsePeptidesLine
(
const
QString
&
peptide_line
)
{
try
{
if
(
_regexp_header_line
.
exactMatch
(
peptide_line
))
{
...
...
@@ -156,14 +189,18 @@ void MascotDatParser::parsePeptidesLine(const QString & peptide_line) {
QStringList
index_list
=
index
.
split
(
"_"
);
if
(
index_list
.
size
()
==
3
)
{
if
(
index_list
[
2
]
==
"db"
)
{
saveAndClearPeptide
();
//q1_p1_db=02
_peptides_fasta_file_list
.
clear
();
while
(
value
.
size
()
>
0
)
{
QString
fasta_str
=
value
.
mid
(
0
,
2
);
_peptide
s_
fasta_file_list
.
push_back
(
_fasta_file_list
.
at
(
fasta_str
.
toInt
()
-
1
));
_current
_peptide
.
fasta_file_list
.
push_back
(
_fasta_file_list
.
at
(
fasta_str
.
toInt
()
-
1
));
value
=
value
.
mid
(
2
);
}
}
//q856_p9_subst=1,X,W
else
if
(
index_list
[
2
]
==
"subst"
)
{
_current_peptide
.
subst
=
value
;
}
}
else
if
(
index_list
.
size
()
==
2
)
{
if
(
value
==
"-1"
)
{
...
...
@@ -172,37 +209,22 @@ void MascotDatParser::parsePeptidesLine(const QString & peptide_line) {
else
{
QString
query_index
=
index_list
[
0
];
_current_peptide
.
query_index
=
query_index
.
mid
(
1
).
toUInt
();
QString
peptide_index
=
index_list
[
1
];
_current_peptide
.
peptide_index
=
peptide_index
.
mid
(
1
).
toUInt
();
//q1_p1=0,597.302322,0.997884,2,GAWHK,9,0000000,7.97,0000012000000000000,0,0;"sp|O95006|OR2F2_HUMAN":0:299:303:1
int
position
=
value
.
indexOf
(
";
\"
"
,
0
);
QString
peptide_string
=
value
.
mid
(
0
,
position
);
qDebug
()
<<
"peptide_string="
<<
peptide_string
;
QStringList
peptide_string_list
=
peptide_string
.
split
(
","
);
pappso
::
Peptide
peptide
(
peptide_string_list
.
at
(
4
));
_current_peptide
.
peptide_string_list
=
peptide_string
.
split
(
","
);
QString
protein_string
=
value
.
mid
(
position
+
2
);
qDebug
()
<<
"protein_string="
<<
protein_string
;
//"sp|Q9Y2I7|FYV1_HUMAN":0:670:675:2,"tr|E9PDH4|E9PDH4_HUMAN":0:614:619:2
QStringList
protein_string_list
=
protein_string
.
split
(
",
\"
"
);
if
(
protein_string_list
.
size
()
!=
_peptides_fasta_file_list
.
size
())
{
throw
pappso
::
PappsoException
(
QObject
::
tr
(
"ERROR (protein_string_list.size() != _peptides_fasta_file_list.size()) %1"
).
arg
(
value
));
}
foreach
(
const
QString
&
str
,
protein_string_list
)
{
//sp|O95006|OR2F2_HUMAN":0:299:303:1
int
position
=
str
.
indexOf
(
"
\"
"
,
0
);
QString
accession
=
str
.
mid
(
0
,
position
);
qDebug
()
<<
"accession="
<<
accession
;
QStringList
position_list
=
str
.
mid
(
position
+
2
).
split
(
":"
);
if
(
position_list
.
size
()
!=
4
)
{
throw
pappso
::
PappsoException
(
QObject
::
tr
(
"ERROR position_list.size() != 4 %1"
).
arg
(
value
));
}
unsigned
int
start
=
position_list
.
at
(
1
).
toUInt
();
unsigned
int
stop
=
position_list
.
at
(
2
).
toUInt
();
}
_current_peptide
.
protein_string_list
=
protein_string
.
split
(
",
\"
"
);
}
...
...
This diff is collapsed.
Click to expand it.
src/input/mascot/mascotdatparser.h
+
14
−
1
View file @
d49d4a75
...
...
@@ -43,6 +43,16 @@ private:
void
parseProteinLine
(
const
QString
&
protein_line
);
void
parseHeaderLine
(
const
QString
&
protein_line
);
void
parsePeptidesLine
(
const
QString
&
peptide_line
);
void
saveAndClearPeptide
();
struct
PeptideLine
{
unsigned
int
query_index
=
0
;
unsigned
int
peptide_index
=
0
;
QString
subst
;
QStringList
peptide_string_list
;
QStringList
protein_string_list
;
std
::
vector
<
FastaFileSp
>
fasta_file_list
;
};
private
:
Project
*
_p_project
;
IdentificationGroup
*
_p_identification_group
;
...
...
@@ -52,13 +62,16 @@ private:
ProteinXtp
_current_protein
;
std
::
vector
<
FastaFileSp
>
_fasta_file_list
;
std
::
vector
<
FastaFileSp
>
_peptides_fasta_file_list
;
QRegExp
_regexp_header_line
;
unsigned
int
_number_of_queries
=
0
;
unsigned
int
_number_of_residues
=
0
;
QString
_error_str
;
PeptideLine
_current_peptide
;
};
#endif // MASCOTDATPARSER_H
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment