Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
E
Extended Microorganisms Taxonomy
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Omnicrobe
Extended Microorganisms Taxonomy
Commits
58460964
Commit
58460964
authored
4 years ago
by
Robert Bossy
Browse files
Options
Downloads
Patches
Plain Diff
try to fix everything (defunct method)
parent
5600712d
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
bacdive2alvisnlp.xslt
+7
-0
7 additions, 0 deletions
bacdive2alvisnlp.xslt
dsmz-match.plan
+258
-28
258 additions, 28 deletions
dsmz-match.plan
with
265 additions
and
28 deletions
bacdive2alvisnlp.xslt
+
7
−
0
View file @
58460964
...
...
@@ -15,7 +15,14 @@
</xsl:template>
<xsl:template
match=
"taxonomy_name/strains/list-item"
>
<a:section
name=
"subspecies_epithet"
xpath-contents=
"subspecies_epithet"
/>
<a:section
name=
"species"
xpath-contents=
"species"
/>
<a:section
name=
"genus"
xpath-contents=
"genus"
/>
<a:section
name=
"family"
xpath-contents=
"family"
/>
<a:section
name=
"ordo"
xpath-contents=
"ordo"
/>
<a:section
name=
"class"
xpath-contents=
"class"
/>
<a:section
name=
"phylum"
xpath-contents=
"phylum"
/>
<a:section
name=
"domain"
xpath-contents=
"domain"
/>
<a:section
name=
"full_scientific_name"
xpath-contents=
"normalize-space(full_scientific_name)"
/>
<a:section
name=
"designation"
xpath-contents=
"designation"
/>
</xsl:template>
...
...
This diff is collapsed.
Click to expand it.
dsmz-match.plan
+
258
−
28
View file @
58460964
...
...
@@ -31,21 +31,153 @@
<createSections/>
</species-and-number>
<ma
tch class="FileMapper
">
<ma
rk-strains class="Action
">
<target>documents.(sections:catalog-number | sections:species-and-number)</target>
<action>set:feat:strain("yes")</action>
<setFeatures/>
</mark-strains>
<mark-candidates class="Action">
<target>documents.(sections[@strain] | sections:species | sections:genus | sections:family | sections:ordo | sections:class | sections:phylum | sections:domain)</target>
<action>set:feat:candidate("yes")</action>
<setFeatures/>
</mark-candidates>
<match class="FileMapper">
<target>documents.sections[@candidate]</target>
<form>contents</form>
<targetFeatures>,taxid,canonical-name,path,pos,rank</targetFeatures>
</match>
<dispatch>
<equivalent class="Action">
<target>documents[(not @dispatch) and sections[@strain and @rank == "no rank"]]</target>
<action>
set:feat:dispatch("equivalent")
| set:feat:taxid(sections[@strain and @rank == "no rank"]{0}.@taxid)
| set:feat:rule(module:id)
</action>
<setFeatures/>
</equivalent>
<separate-subspecies-type-material class="Action">
<target>documents[(not @dispatch) and sections[@strain and @rank == "subspecies"]]</target>
<action>
set:feat:dispatch("new")
| set:feat:taxid(sections[@strain and @rank == "subspecies"]{0}.@taxid)
| set:feat:rule(module:id)
</action>
<setFeatures/>
</separate-subspecies-type-material>
<separate-species-type-material class="Action">
<target>documents[(not @dispatch) and sections[@strain and @rank == "species"]]</target>
<action>
set:feat:dispatch("new")
| set:feat:taxid(sections[@strain and @rank == "species"]{0}.@taxid)
| set:feat:rule(module:id)
</action>
<setFeatures/>
</separate-species-type-material>
<new-strain-for-subspecies class="Action">
<target>documents[(not @dispatch) and sections:species[@rank == "subspecies"]]</target>
<action>
set:feat:dispatch("new")
| set:feat:taxid(sections:species[@rank == "subspecies"].@taxid)
| set:feat:rule(module:id)
</action>
<setFeatures/>
</new-strain-for-subspecies>
<new-strain-for-species class="Action">
<target>documents[(not @dispatch) and sections:species[@rank == "species"]]</target>
<action>
set:feat:dispatch("new")
| set:feat:taxid(sections:species[@rank == "species"].@taxid)
| set:feat:rule(module:id)
</action>
<setFeatures/>
</new-strain-for-species>
<new-strain-for-genus class="Action">
<target>documents[(not @dispatch) and sections:genus[@rank == "genus"]]</target>
<action>
set:feat:dispatch("new-species")
| set:feat:taxid(sections:genus[@rank == "genus"].@taxid)
| set:feat:rule(module:id)
</action>
<setFeatures/>
</new-strain-for-genus>
<new-strain-for-family class="Action">
<target>documents[(not @dispatch) and sections:family[@rank == "family"]]</target>
<action>
set:feat:dispatch("new-species")
| set:feat:taxid(sections:family[@rank == "family"].@taxid)
| set:feat:rule(module:id)
</action>
<setFeatures/>
</new-strain-for-family>
<new-strain-for-ordo class="Action">
<target>documents[(not @dispatch) and sections:ordo[@rank == "order"]]</target>
<action>
set:feat:dispatch("new-species")
| set:feat:taxid(sections:ordo[@rank == "order"].@taxid)
| set:feat:rule(module:id)
</action>
<setFeatures/>
</new-strain-for-ordo>
<new-strain-for-class class="Action">
<target>documents[(not @dispatch) and sections:class[@rank == "class"]]</target>
<action>
set:feat:dispatch("new-species")
| set:feat:taxid(sections:class[@rank == "class"].@taxid)
| set:feat:rule(module:id)
</action>
<setFeatures/>
</new-strain-for-class>
<new-strain-for-phylum class="Action">
<target>documents[(not @dispatch) and sections:phylum[@rank == "phylum"]]</target>
<action>
set:feat:dispatch("new-species")
| set:feat:taxid(sections:phylum[@rank == "phylum"].@taxid)
| set:feat:rule(module:id)
</action>
<setFeatures/>
</new-strain-for-phylum>
<new-strain-for-domain class="Action">
<target>documents[(not @dispatch) and sections:domain[@rank == "superkingdom"]]</target>
<action>
set:feat:dispatch("new-species")
| set:feat:taxid(sections:domain[@rank == "superkingdom"].@taxid)
| set:feat:rule(module:id)
</action>
<setFeatures/>
</new-strain-for-domain>
<no-match class="Action">
<target>documents[not @dispatch]</target>
<action>
set:feat:dispatch("no-match")
| set:feat:taxid("")
| set:feat:rule(module:id)
</action>
<setFeatures/>
</no-match>
</dispatch>
<export>
<
name-match
class="TabularExport">
<
full-report
class="TabularExport">
<outDir>.</outDir>
<corpusFile>
all
.txt</corpusFile>
<lines>documents.sections[@
taxid
]</lines>
<corpusFile>
full-report
.txt</corpusFile>
<lines>documents.sections[@
candidate
]</lines>
<columns>
document.@id,
document.sections:species.contents,
document.sections:full_scientific_name.contents,
@name,
contents,
@taxid,
...
...
@@ -53,47 +185,145 @@
@rank
</columns>
<headers>
"ID",
"SPECIES NAME",
"FULL NAME",
"BACDIVE ID",
"FIELD",
"NAME",
"NCBI TAXID",
"NCBI CANONICAL",
"NCBI RANK"
</headers>
</name-match>
</full-report>
<dispatch-report class="TabularExport">
<outDir>.</outDir>
<corpusFile>dispatch-report.txt</corpusFile>
<lines>documents</lines>
<columns>
@id,
@dispatch,
@taxid,
@rule
</columns>
<headers>
"BACDIVE ID",
"DISPATCH",
"NCBI TAXID",
"RULE"
</headers>
</dispatch-report>
<equivalent-strains class="TabularExport">
<outDir>.</outDir>
<corpusFile>equivalent-strains.txt</corpusFile>
<lines>documents
[sections[@rank == "no rank"]].sections:catalog-number[contents ?= " "]</lines> <!-- exclude ym20-087 kondo51 etc --
>
<lines>documents
</lines
>
<columns separator=";">
str:replace(str:lower(contents), " ",
"
:
")
;
document.sections[@rank == "no rank"]{0}.@taxid
(if @dispatch == "equivalent" then @taxid ^ "\t" else
"")
^
str:join:'\t'(sections:catalog-number[contents ?= " "], str:replace(str:lower(contents), " ", ":")) <!-- exclude ym20-087 kondo51 etc -->
</columns>
</equivalent-strains>
<new-strains>
<identifier class="Action">
<target>documents[sections[@rank == "species" or @rank == "subspecies"]]</target>
<action>set:feat:new-taxid(str:replace(str:lower(sections:catalog-number[contents ^= "DSM"].contents), " ", ":"))</action>
<canonical>
<first-dsm class="Action">
<target>documents[@dispatch == "new" or @dispatch == "new-species"].sections:catalog-number[contents ^= "DSM"]{0}</target>
<action>set:feat:canonical("yes")</action>
<setFeatures/>
</first-dsm>
<first-any class="Action">
<target>documents[(@dispatch == "new" or @dispatch == "new-species") and not sections:catalog-number[@canonical]].sections:catalog-number{0}</target>
<action>set:feat:canonical("yes")</action>
<setFeatures/>
</first-any>
<new-taxid class="Action">
<target>documents[@dispatch == "new" or @dispatch == "new-species"].sections:catalog-number[@canonical]</target>
<action>document.set:feat:new-taxid(str:replace(str:lower(target.contents), " ", ":"))</action>
<setFeatures/>
</new-taxid>
<scientific-name class="Action">
<target>documents[@dispatch == "new" or @dispatch == "new-species"].sections:catalog-number[@canonical]</target>
<action>document.sections:species-and-number[contents ?= target.contents]{0}.set:feat:scientific-name("yes")</action>
<setFeatures/>
</scientific-name>
</canonical>
<new-species-id class="Action">
<target>documents[@dispatch == "new-species"]</target>
<action>set:feat:new-species-id("prov:" ^ str:replace(sections:species[contents != "unclassified"].contents, " ", "-"))</action>
<setFeatures/>
</identifier>
</new-species-id>
<export-nodes class="TabularExport">
<outDir>.</outDir>
<corpusFile>dsmz-nodes.dmp</corpusFile>
<lines>documents[@dispatch == "new" or @dispatch == "new-species"]</lines>
<separator trim="false"> | </separator>
<columns>
@new-taxid,
if @dispatch == "new" then @taxid else @new-species-id,
"no rank",
"",
"0",
"1",
"11",
"1",
"0",
"1",
"1",
"0",
""
</columns>
</export-nodes>
<export-species-nodes class="TabularExport">
<outDir>.</outDir>
<corpusFile>dsmz-species-nodes.dmp</corpusFile>
<lines>sort:nsval(documents[@dispatch == "new-species"], @new-species-id ^ "___" ^ @taxid)</lines>
<separator trim="false"> | </separator>
<columns>
@new-species-id,
@taxid,
if sections:subspecies_epithet.contents == "" then "species" else "subspecies",
"",
"0",
"1",
"11",
"1",
"0",
"1",
"1",
"0",
""
</columns>
</export-species-nodes>
<export-names class="TabularExport">
<outDir>.</outDir>
<corpusFile>dsmz-names.dmp</corpusFile>
<lines>documents[@dispatch == "new" or @dispatch == "new-species"].sections[@strain]</lines>
<separator trim="false"> | </separator>
<columns>
document.@new-taxid,
contents,
"",
if @scientific-name then "scientific name" else "equivalent catalog"
</columns>
</export-names>
<export class="TabularExport">
<export
-species-names
class="TabularExport">
<outDir>.</outDir>
<corpusFile>taxa+id_dsmz.txt</corpusFile>
<lines>documents[@new-taxid].(sections:catalog-number | sections:species-and-number)</lines>
<columns separator=";">
contents;
document.@new-taxid;
document.sections:species-and-number[contents ?= "DSM"].contents;
document.sections[@taxid]{0}.@path ^ "/" ^ document.@new-taxid;
document.sections[@taxid]{0}.@pos;
"no rank"
<corpusFile>dsmz-species-names.dmp</corpusFile>
<lines>documents[@dispatch == "new-species"].sections:species</lines>
<separator trim="false"> | </separator>
<columns>
document.@new-species-id,
contents,
"",
"scientific name"
</columns>
</export>
</export
-species-names
>
</new-strains>
</export>
</alvisnlp-plan>
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment