#!/usr/bin/perl -w open(PR, "id_description_go_process.txt"); while () { chomp; ($sid, $descr) = split /\s+/, $_, 2; $process{$sid} = 1; } close(PR); open(IN, "gene_association.sgd"); #open(IN, "gene_association.sgd.2003-12-12.txt"); while () { unless (/^!/) { chomp; # Reset @data = (); $orf = (); $syn = (); $gene = (); @data = split /\t/, $_; $class = $data[4]; $gene = $data[2]; $syn = $data[10]; if ($syn =~ /\|/) { ($orf, $rest) = split /\|/, $syn, 2; } elsif ($syn eq "") { $orf = $gene; } else { $orf = $syn; } # print "$orf\t$gene\t$syn\t$class\n"; if (defined $process{$class} && $class ne "GO:0000004") { # Only record class if process and if not "unknown process" push @{$class{$orf}}, $class; } } } close(IN); # Remove non-unique classes @orf = sort(keys %class); for $orf (@orf) { %unique = (); @orf_class = (); @orf_class = @{$class{$orf}}; for $orf_class (@orf_class) { $unique{$orf_class} = 1; } @{$unique_class{$orf}} = sort(keys %unique); } @orf = (); @orf = sort(keys %unique_class); for $orf (@orf) { @orf_class = (); @orf_class = @{$unique_class{$orf}}; if ($orf =~ /^Y[A-Z]{2}[0-9]{3}/) { $orf =~ s/-//; } print "$orf\t@orf_class\n"; }