Skip to content
Snippets Groups Projects
Commit 8703f784 authored by Sebastien Carrere's avatar Sebastien Carrere
Browse files

add generic parser

parent ee14fe00
No related branches found
No related tags found
No related merge requests found
......@@ -27,6 +27,14 @@ Convert [Nicknames](https://forgemia.inra.fr/lipme/nicknames-pipeline) TSV file
- Option: Filter on a specific dataset or gene id
### ParseGenericTsvFile
Convert Generic TSV file :
- mandatory columns : query, target
- to add query and or target metadata, column headers must respect syntax "query:metadataX" or "target:metadataY"
- all other columns are considered as edge metadata
- Option: Filter on a specific id (query or target)
## Installation
```bash
......
import fs from "fs";
import { basename } from "path";
import { v4 as uuidv4 } from 'uuid';
import { getFileContent, Graph, GraphLink, GraphNode, randomIntFromInterval } from "./common";
const fields: string[] = [
"query",
"target",
];
export const ParseGenericTsvFile = (infile: string, id?: string, xmax?: number, ymax?: number): Graph => {
if (!fs.existsSync(infile)) {
throw new Error(`[ERROR]\tFile not found: ${infile}`);
}
const content = getFileContent(infile);
const records: any[] = [];
const nodes: { [key: string]: GraphNode } = {};
const links: GraphLink[] = [];
//get fields name and positions from the header
const header = content.split(/[\n\r]/)[0].replace(/#/g, '').split('\t')
console.log(header)
const queryIndex = header.indexOf('query');
const targetIndex = header.indexOf('target');
if (queryIndex === -1 || targetIndex === -1) {
throw new Error(`[ERROR]\tInvalid file format: ${infile} - Should contain 'query' and 'target' columns`);
}
const fieldPositions: { [key: string]: number } = {};
header.forEach((field: string, index: number) => {
fieldPositions[field] = index;
})
content.split(/[\n\r]/).slice(1).forEach((line: string) => {
const record = line.split('\t');
if (record.length === Object.keys(fieldPositions).length) {
if (!id) {
records.push(record);
} else if ((id && (record[fieldPositions['query']] === id || record[fieldPositions['target']] === id))) {
records.push(record);
}
}
});
const nodeInfo: { [key: string]: any } = {}
records.forEach((record: string[]) => {
const queryid = record[fieldPositions['query']];
const targetid = record[fieldPositions['target']];
if (!nodeInfo[queryid]) {
nodeInfo[queryid] = {};
}
Object.keys(fieldPositions).filter((field) => field.startsWith('query:')).forEach((field) => {
if (!nodeInfo[queryid][field.replace('query:', '')]) {
nodeInfo[queryid][field.replace('query:', '')] = [];
}
nodeInfo[queryid][field.replace('query:', '')].push(record[fieldPositions[field]]);
})
if (!nodeInfo[targetid]) {
nodeInfo[targetid] = {};
}
Object.keys(fieldPositions).filter((field) => field.startsWith('target:')).forEach((field) => {
if (!nodeInfo[targetid][field.replace('target:', '')]) {
nodeInfo[targetid][field.replace('target:', '')] = [];
}
nodeInfo[targetid][field.replace('target:', '')].push(record[fieldPositions[field]]);
})
const linkInfo: { [key: string]: any } = {};
Object.keys(fieldPositions).filter((field) => !field.startsWith('query:') && !field.startsWith('target:')).forEach((field) => {
linkInfo[field] = record[fieldPositions[field]];
})
links.push({
id: uuidv4(),
label: 'link',
source: queryid,
target: targetid,
relation: 'link',
metadata: {
data: linkInfo
}
});
}
);
Object.keys(nodeInfo).map((nodeid) => {
Object.keys(nodeInfo[nodeid]).map((field) => {
nodeInfo[nodeid][field] = [...new Set(nodeInfo[nodeid][field])];
})
})
Object.keys(nodeInfo).map((nodeid) => {
const node: GraphNode = {
id: nodeid,
label: nodeid,
metadata: {
position: {
x: randomIntFromInterval(1, xmax ? xmax : 800),
y: randomIntFromInterval(1, ymax ? ymax : 600),
},
type: 'node',
data: nodeInfo[nodeid]
}
}
nodes[nodeid] = node
})
console.log(`[INFO]\tParsed ${records.length} records from ${infile}`);
console.log(`[INFO]\tGenerated ${Object.keys(nodes).length} nodes and ${links.length} edges`);
const graph: Graph = {
id: uuidv4(),
label: basename(infile),
nodes: nodes,
edges: links,
metadata: {
}
};
return graph;
}
\ No newline at end of file
import { ParseOboFile } from "./obo";
import { ParseOrthoFinderTsvFile } from "./orthofinder";
import { ParseNicknamesTsvFile } from "./nicknames";
import { ParseGenericTsvFile } from "./generic";
export { ParseOboFile, ParseOrthoFinderTsvFile, ParseNicknamesTsvFile };
export { ParseOboFile, ParseOrthoFinderTsvFile, ParseNicknamesTsvFile, ParseGenericTsvFile };
export type { Graph, GraphLink, GraphNode } from './common'
import { ParseGenericTsvFile } from '../src';
import fs from 'fs';
test('Parse Gziped Generic File correctly', () => {
const result = ParseGenericTsvFile('tests/generic.tsv.gz', undefined);
fs.writeFileSync('tests/generic.json', JSON.stringify(result, null, 2));
expect(result.edges.length).toBe(500);
expect(Object.keys(result.nodes).length).toBe(484);
});
test('Parse Gziped Generic File correctly with id filter', () => {
const result = ParseGenericTsvFile('tests/generic.tsv.gz', 'SpeciesA_Chr12g028070');
fs.writeFileSync('tests/generic.id.json', JSON.stringify(result, null, 2));
expect(result.edges.length).toBe(1);
expect(Object.keys(result.nodes).length).toBe(2);
});
File added
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment