Skip to content
Snippets Groups Projects
Commit a7751c03 authored by Alexis Mergez's avatar Alexis Mergez
Browse files

Update gaf2aln.py

parent a12ec03c
No related branches found
No related tags found
1 merge request!2Update PanGeTools.def
......@@ -167,6 +167,55 @@ links = {}
# Parsing the gfa
print(f"[gaf2aln::GFA Parser] Extracting nodes, paths and links ...")
def GFA_parser(gfa_lines, nodes = nodes):
_links, _nodes, _nodes_length, paths = {}, {}, {}, {}
for line in gfa_lines:
line_content = line[:-1].split("\t")
line_id = line_content[0]
# Segment line
if line_id == "S" :
_nodes_length[str(line_content[1])] = len(line_content[2])
# Link line
elif line_id == "L":
try :
_links[str(line_content[1])][str(line_content[3])] = {
"FROM": str(line_content[2]),
"TO": str(line_content[4])
}
except :
_links[str(line_content[1])] = {
str(line_content[3]) : {"FROM.ORIENT": str(line_content[2]), "TO.ORIENT": str(line_content[4])}
}
# Path line
elif line_id == "P":
_paths[str(line_content[1])] = {
"NODES": {
str(node_id[:-1]): str(node_id[-1])
for node_id in line_content[2].split(',')
},
"CIGAR": line_content[3]
}
return [_links, _nodes, _nodes_length, _paths]
# splits = np.quantile(range(len(gfa_lines)+1), q= np.array(args.threads+1)/args.threads, method='higher').tolist()
# res = []
# for i in range(1, len(splits)):
# res.append(executor.submit(GFA_parser, gfa_lines[splits[i-1]:splits[i]]))
# for out in res:
# results = out.result()
# for link_id, link_info in results[0].items():
# links[]
for line in gfa_lines:
line_content = line[:-1].split("\t")
line_id = line_content[0]
......@@ -201,27 +250,44 @@ for line in gfa_lines:
del gfa_lines
print(f"[gaf2aln::Graph alignment processing] Computing nodes positions in each paths...")
# Adding nodes positions relative to path
for path_name in paths.keys():
print(f"[gaf2aln::Graph processing] Running on {path_name} ...")
print(f"[gaf2aln::Graph position processing] Computing nodes positions in each paths...")
def get_node_pos(path_name, nodes = nodes, paths = paths, nodes_length = nodes_length):
print(f"[gaf2aln::Graph position processing] Running on {path_name} ...")
cur_pos = 0
out = {}
# Iterating over nodes in the path
for path_node in paths[path_name]["NODES"].keys():
# Instead of checking if the node is one interesting node, we try to add to the nodes dict
try :
nodes[path_node][path_name] = {
if path_node in aln_nodes :
out[path_node] = {
"START": cur_pos, # Start position of the node start in the currrent path
"END": cur_pos+nodes_length[path_node], # End position of the node end in the current path
"STRAND": paths[path_name]["NODES"][node_id] # Orientation of the node in the current path
"STRAND": paths[path_name]["NODES"][path_node] # Orientation of the node in the current path
}
cur_pos += nodes_length[path_node]+1
except :
else :
cur_pos += nodes_length[path_node]+1
print(f"[gaf2aln::Graph alignment processing] Computing nodes positions in each alignement...")
return out
res = {}
executor = concurrent.futures.ThreadPoolExecutor(max_workers=args.threads)
# Adding nodes positions relative to path
for path_name in paths.keys():
res[path_name] = executor.submit(get_node_pos, path_name)
executor.shutdown(wait=True)
for path_name, out in res.items():
results = out.result()
for path_node, node_pos in results.items():
nodes[path_node][path_name] = node_pos
del res
print(f"[gaf2aln::Alignment position processing] Computing nodes positions in each alignement...")
# Adding nodes positions relative to path
def get_aln_node_info(aln_name, aln_dict = aln_dict, nodes_length = nodes_length):
......@@ -271,14 +337,22 @@ def get_aln_node_info(aln_name, aln_dict = aln_dict, nodes_length = nodes_length
return res
# Storing alignement
aln_processing = {}
res = {}
executor = concurrent.futures.ThreadPoolExecutor(max_workers=args.threads)
for aln_name in aln_dict.keys():
print(f"[gaf2aln::Graph processing] Running on {aln_name} ...")
print(f"[gaf2aln::Alignment position processing] Running on {aln_name} ...")
_ = get_aln_node_info(aln_name, aln_dict = aln_dict, nodes_length = nodes_length)
res[aln_name] = executor.submit(get_aln_node_info, aln_name)
#res[aln_name] = get_aln_node_info(aln_name, aln_dict = aln_dict, nodes_length = nodes_length)
executor.shutdown(wait=True)
for node_id, res in _.items():
nodes[node_id][aln_name] = res
for aln_name, node_info in res.items():
results = node_info.result()
for node_id, info in results.items():
nodes[node_id][aln_name] = info
del res
# Calculating CIGAR for each nodes in each aln
print(f"[gaf2aln::CIGAR processing] Computing nodes cigar from alignement ...")
......@@ -306,7 +380,6 @@ ALNS = {}
for aln_name in aln_dict.keys():
for path_name in paths.keys():
ALNS[(path_name, aln_name)] = []
_ = []
for node_id, orient in aln_dict[aln_name]["PATH.MATCH"]:
......@@ -316,7 +389,10 @@ for aln_name in aln_dict.keys():
q_end = n_info[aln_name]["END"]
_CG = n_info[aln_name]["CIGAR"]
try :
print(node_id, path_name, q_start, q_end)
if path_name in list(n_info.keys()):
print("\tIn path")
if n_info[aln_name]["STRAND"] == n_info[path_name]["STRAND"] :
t_start = n_info[path_name]["START"]+n_info[aln_name]["S.OFF"]
t_end = n_info[path_name]["END"]+n_info[aln_name]["E.OFF"]
......@@ -324,6 +400,8 @@ for aln_name in aln_dict.keys():
t_end = n_info[path_name]["START"]+n_info[aln_name]["S.OFF"]
t_start = n_info[path_name]["END"]+n_info[aln_name]["E.OFF"]
print("\t", t_start, t_end)
# Non empty temporary list of aln and ending of the last block is the same as the start of the new node :
if len(_) and _[-1]["T.END"] == t_start and _[-1]["Q.END"] == q_start:
tmp_aln["Q.END"] = q_end
......@@ -343,17 +421,18 @@ for aln_name in aln_dict.keys():
"T.END": t_end,
"CG": _CG,
}
except:
print("\t", tmp_aln)
else :
print("\tNot in path")
# Node is not in the path
tmp_aln = {
"Q.START": q_start,
"Q.END": q_end,
"T.START": -1,
"T.END": -1,
"CG": f"{nodes_length[node_id]}D"
}
print(ALNS)
_.append(tmp_aln)
ALNS[(path_name, aln_name)] = _
## Debug
for elem in ALNS[("TO1000#1#chr03", "ALN_1")]:
print(elem)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment