#!/usr/bin/perl -w use strict; # Goal : 1. Format "go_process_graph.txt" into "process_graph.txt"; # 2. format "go_process_orfs.txt" into "process_terms_for_orf.txt"; # Author: Haiyuan Yu # Date : 04/19/2006 sub main; #my $hostname = `hostname -s`; my $hostname = `hostname`; my $HOME = ($hostname =~ /^Legolas/ ) ? "/cygdrive/d/Work" : "/home/bh1/haiyuan/Linux_copy" ; my $origin = "GO:0008150"; &main; sub main { my (%GOf, %BPb, %BP_id); # %GOf: the forward edge of GO; %BPb: the backward edge of only BP in GO; %BP_id: all the go ids in BP; open (DATA, "go_process_graph_hy.txt") || die "Unable to open graph"; open (OUT, ">process_graph_hy.txt"); while () { chomp; my ($p, $c) = split(/\-\>/); $GOf{$p}{$c} = 1 if (($p ne "GO:0000004") && ($c ne "GO:0000004")); } close (DATA); print OUT "origin\-\>$origin"; $BP_id{$origin} = 1; my %read = my @level = my @nlevel = (); push @level, $origin; while ($#level >= 0) { @nlevel = (); for my $aa (@level) { if (!exists $read{$aa}) { $read{$aa} =1; for my $bb (keys %{$GOf{$aa}}) { push @nlevel, $bb; $BPb{$bb}{$aa} = 1; $BP_id{$bb} = 1; my $t = $aa ."_".$bb; print OUT " $aa\-\>$bb" if (!exists $read{$t}); $read{$t} = 1; } } } @level = (); push @level, @nlevel; } print OUT "\n"; close (OUT); open (DATA, "go_process_orf.txt") || die "Unabl eto oepn orfs"; open (OUT, ">process_terms_for_orf_hy.txt"); while () { chomp; my ($orf, @goes) = split(/\s+/); my $key = 0; # to dermine whether an orf has any annotation in BP; for my $go (@goes) { $key = 1 if (exists $BPb{$go}); } if ($key) { print OUT "$orf"; my $n = 0; %read = @level = @nlevel = (); for my $go (@goes) { if (exists $BPb{$go}) { push @level, $go; while ($#level >= 0) { @nlevel = (); for my $aa (@level) { $n ++; if ($n == 1) { print OUT "\t$aa" if (!exists $read{$aa}); $read{$aa} = 1; } else { print OUT " $aa" if (!exists $read{$aa}); $read{$aa} = 1; } for my $bb (sort keys %{$BPb{$aa}}) { push @nlevel, $bb if (!exists $read{$bb}); } } @level = (); push @level, @nlevel; } } } print OUT "\n"; } } close (DATA); close (OUT); }