tglogo.png
0 LIKES

Snippet


Words Frequencies by gennarino

In Category General IRC Posted by Snippets On 12/06/23

Words Frequencies using hash table!
I wrote this script to learn hash tables.
Copy the source in your script editor and you get a new menu tab called Words.
The Top10 and Bottom10 aliases where taken from the official mirc documentation
;
; Collects words and frequencies. Last Modified: January, 19, 2021
; 
;                    By gennarino
;
; Creates Hash table Words => Frequencies
;

On *:Start:{
  if (!$hget(Words)) { hmake Words 100 }
  if ($isfile($scriptdir/Words.hsh)) { hload  Words $scriptdir/Words.hsh }
}

On *:Exit: { if ($hget(Words)) { hsave Words $scriptdir/Words.hsh } }
On *:Disconnect: { if ($hget(Words)) { hsave Words $scriptdir/Words.hsh } }

;End Hash Table handler

on *:TEXT:*:#: {
  if ( http isin $1- ) halt   ; skips sentences containing links
  ;Removes all control codes (bold/underline/italics/color/reverse)
  ;echo 4 -a $1-
  var %sentence = $strip($1-,buricmo)
  ;echo 4 -a %sentence 
  %sentence = $replace(%sentence,#,$chr(32),?,$chr(32),.,$chr(32),!,$chr(32),",$chr(32),',$chr(32),$chr(44),$chr(32),:,$chr(32),$chr(40),$chr(32),$chr(41),$chr(32),/,$chr(32))
  ; echo 4 -a { %sentence }
  var %Lengn = 0
  var %k = 1
  var %aw = $NULL
  set %single $NULL
  tokenize 32 %sentence
  while (  %k <= $0 ) { 
    %aw  = $ [ $+ [ %k ] ]
    %Lengn = $len(%aw)
    if (%Lengn > 3) {           ; Accept only words greater than 3
      if ($hget(Words,%aw)) { 
        hinc Words %aw 1 
      }
      else { 
        hadd Words %aw 1  
        %single = %aw
        ;       echo 8 -a  %single 
      }
    }
    inc %k
  }
}

menu * {

  ?  --- Words --> 
  .-
  .Save Hash Table: .hsave Words $scriptdir/Words.hsh
  .Save Ini File: .hsave -i Words $scriptdir/Words.ini
  .Show Top10: .top10
  .Show Bottom10: .bottom10
  .Show LastUnknown: //echo -a Last single word used:  %single
  .Prune lowest: Wprune $?="Enter Upper limit"
  .Search Word: Wget $?="Enter word: "

  ; .Print All: .print_All_Words  ; Warning: could hang MIRC
  .-
}

Alias print_All_Words {
  var %i = 1
  echo -a Words Table:
  ; iterate over each item
  while ($hget(Words, %i).item) {
    echo -a %i $+ ) $v1 => $hget(Words, $v1)
    inc %i
  }
}

alias top10 {
  hsave -no Words top10.txt
  filter -ffcteun 1 32 top10.txt top10.txt
  var %i = 1 
  while (%i <= 10) {
    var %top10.item = $hget(Words,$gettok($read(top10.txt,nt,%i),1,32)).item
    set %top10 %top10 %top10.item  ( $+ $hget(Words,%top10.item) $+ )
    inc %i
  }
  echo -a TOP 10: $replace(%top10,$chr(32),$+($chr(44),$chr(32)))
  unset %top10
}

alias bottom10 {
  hsave -no Words bottom10.txt
  filter -ffctun 1 32 bottom10.txt bottom10.txt
  var %i = 1 
  while (%i <= 10) {
    var %bottom10.item = $hget(Words,$gettok($read(bottom10.txt,nt,%i),1,32)).item
    set %bottom10 %bottom10 %bottom10.item ( $+ $hget(Words,%bottom10.item) $+ )
    inc %i
  }
  echo -a BOTTOM 10: $replace(%bottom10,$chr(32),$+($chr(44),$chr(32)))
  unset %bottom10
}
alias wdel hdel Words $$1 | echo 4 -a $$1 => deleted
alias wget if ($hget(Words,$$1)) //say The word 4 $$1 has been used 4 $hget(Words, $1) times!

alias Wprune {
  if ( $1 ) var %q = $1
  else var %q = 2
  var %c = 1
  var %p = 0
  var %key = $hget(Words, %c).item
  var %data = $hget(Words, %c).data
  echo -a You have $hget(Words, 0).item items in your hash table
  while ( %key ) {
    if ( %data < %q ) {
      hdel Words %key 
      inc %p 1
    }
    inc %c 1
    %key = $hget(Words, %c).item
    %data = $hget(Words, %c).data
  }
  echo 4 -a Pruned %p entries
  hsave Words $scriptdir/Words.hsh 
}


Comments 0


Please note that on our website we use cookies necessary for the functioning of our website, cookies that optimize the performance. To learn more about our cookies, how we use them and their benefits, please read our Cookie Policy.
I Understand