#!/usr/bin/perl -w # Parse the GO ontology #open(PO, "process.ontology.2002-06-01"); #open (PO, "process.ontology.2003-12-12.txt"); open(PO, "process.ontology"); # open(PO, "test.txt"); while () { chomp; unless (/^!/) { # Header # print "line = $_\n"; # Remove leading spaces and start character # Need to count leading spaces /^\s*/; $space = $&; $level = length $space; # print "level = $level\n"; s/^\s*(\$|<|%)//; @data = split /<|%|\$/; @id = (); # Reset for $data (@data) { # Remove leading space $data =~ s/^\s*//; # print "data = $data\n"; # Identify terms @term = split /\s*;\s*/, $data; for $i (0 .. $#term) { # print "term = $term[$i]\n"; if ($term[$i] =~ /^GO:[0-9]+/) { $description = $term[$i - 1]; $id = $term[$i]; $id =~ s/\s+//g; # Remove all spaces push @id, $id; # print "id = $id\tdescription = $description\n"; # IDs can me multiple, separate single IDs @sid = split /\s*,\s*/, $id; for $sid (@sid) { # print "sid = $sid\n"; $description{$sid} = $description; } } } } # print "level = $level\tid array = @id\n"; $node_level = $level; $node[$node_level] = $id[0]; # print "node_level = $node_level\tnode = $node[$node_level]\n"; # Compute edges if ($node_level > 0) { @parent = split /\s*,\s*/, $node[$node_level - 1]; @child = split /\s*,\s*/, $node[$node_level]; # print "parent = @parent\n"; # print "child = @child\n"; for $parent (@parent) { for $child (@child) { $edge = $parent."->".$child; $graph{$edge} = 1; # print "edge = $edge\n"; } } } # All the ids followed the first one that are seperated by "%" or "<" are parents of the first id -- Ronald made a major mistake!!! if ($#id > 0) { for $i (1 .. $#id) { # Compute edges @parent = split /\s*,\s*/, $id[$i]; @child = split /\s*,\s*/, $id[0]; # print "parent = @parent\n"; # print "child = @child\n"; for $parent (@parent) { for $child (@child) { $edge = $parent."->".$child; $graph{$edge} = 1; # print "edge = $edge\n"; } } } } } } close(PO); # Print node descriptions open(TD, ">id_description_go_process_hy.txt"); open(PN, ">go_process_node_hy.txt"); @sid = sort(keys %description); $count = 0; for $sid (@sid) { print TD "$sid\t$description{$sid}\n"; print PN "$count\t$sid\n"; $count++; } close(TD); close(PN); # Print graph (edges) open(GR, ">go_process_graph_hy.txt"); @edge = sort(keys %graph); for $edge (@edge) { print GR "$edge\n"; } close(GR);