#!/bin/sh #ulimit -v 100000 #ulimit -s 100000 rm greedyoutput touch greedyoutput cp $* sequences size="`wc -l sequences | awk '{print $1}'`" while [ "$size" -ge 1 ] ; do RUNPRATT.sh # pratt fasta sequences < s GREEDY.sh |tee >>greedyoutput rm -rf $size mkdir $size mv sequences.*.pat $size size="`wc -l sequences | awk '{print $1}'`" done
#!/bin/sh size="`wc -l prattoutput | awk '{print $1}'`" if [ "$size" -ge 1 ] ; then cat prattoutput | sort -nr | gawk ' BEGIN { file="sequences"; newfile="newsequences"; OUTPUT=0; } NR==1 { for( i=4 ; i<=NF ;i++ ) { REMOVE[$i] = 1; } SEQ= -1; while( getline line < file > 0 ) { if ( line ~ /^>/ ) { SEQ++; if( REMOVE[SEQ] ) printf line " " ; } if ( !REMOVE[SEQ] ) { print line >newfile; OUTPUT=1; } } print $1,$2,$3; } END { if(OUTPUT) system( "mv " newfile " " file ); else { system( "rm " file ); printf "" > file ; } } ' else cat sequences rm sequences touch sequences fi
#!/bin/sh size="`grep '>' sequences | wc -l | awk '{print $1}'`" rm prattoutput gawk ' BEGIN { K = NOFSEQ = ARGV[1] ; # number of sequences in 'sequences' step = int(K/10+0.5) ; for( ; K >= 4 ; K = K - ( int(K/5) ? int(K/5) : 1 ) ) { print K, step, int(K/10 + 0.5) ; print "m\n" K "\nw\n10\n\nz\n2.0\nh\n100\ni\n0.1\nx\n\n" >"s"; close("s"); print "Call Pratt on file with " NOFSEQ " sequences, covering at least " K " of them" >"prattoutput"; system( "RUNPRATTONCE.tcsh" ); while ( getline < "mdl.special" > 0 ) print > "prattoutput"; close ("mdl.special"); } system( "cp prattoutput prattoutput." NOFSEQ ); }' $size
#!/bin/tcsh unlimit limit coredumpsize 0 pratt fasta sequences < s
M: Min. number of sequences 10 B: nr of symbols in Block structure 20 S: nr of symbols in first Search 20 R: Refinement on U: fUll refinement off I: minimum Info contents 0.1 N: max Number of flexibilities 2 F: max Flexibility 2 P: max flex Product 10 Y: restricted flexibilitY on W: max Wildcard length 10 L: max Length 50 C: max num of Components 50 H: max length Hit list 50 A: max number Alignments 50 O: filename Output patterns sequences.10.pat 0: Alignment input flag off 2: Use short seq to guide search on 4: Sloppyness in match: 0 5: Print Motifs in Sequences on 6: Ratio for printing 10 D: Diagnosticity analysis off 8: Quotient increase threshold 5 9: Cardinality quotient 3 T: Input tree off #: Input Dist off Z: braZma values: c1 8.00 c2 8.00 c3 50.00
Page compiled by: Inge Jonassen.