Skip to content
Bruce Mitchener edited this page Jan 6, 2014 · 1 revision
;; ==== START OF strutil.goo ====

;;
;; Copyright 2002 by Neelakantan Krishnaswami
;;
;; Permission is hereby granted, free of charge, to any person obtaining
;; a copy of this software and associated documentation files (the
;; "Software"), to deal in the Software without restriction, including
;; without limitation the rights to use, copy, modify, merge, publish,
;; distribute, sublicense, and/or sell copies of the Software, and to
;; permit persons to whom the Software is furnished to do so, subject to
;; the following conditions:
;;
;; The above copyright notice and this permission notice shall be
;; included in all copies or substantial portions of the Software.
;;
;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
;; IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR
;; OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
;; ARISING FROM, OUT OF OR IN CONNECTION WITH THE  SOFTWARE OR THE USE OR
;; OTHER DEALINGS IN THE SOFTWARE.
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Author: Neel Krishnaswami <[email protected]>
;; Version: 0.3
;; Synopsis: This library exports several functions and methods for
;;           processing strings. The functions here were inspired by
;;           the Python string module and Java's String class.
;;
;; o find-str (tgt|<str> pat|<str> rest|... => (t? <int>))
;;   - tgt is the target string to search in
;;   - pat is the pattern string to be matched
;;   - There is one optional argument, pos, which says where to
;;     start searching
;;
;;   The return value is the integer index of the beginning of the
;;   first matching substring of tgt. On a match failure it returns
;;   #f.
;;
;; o replace-str (s|<str> pat|<str> rpl|<str> => <str>)
;;   - s is the string to search in
;;   - pat is the pattern string to search for
;;   - rpl is the replacement text
;;
;;   The return value is a new <str>. If no
;;
;; o space? (<chr> => <log>)
;;   returns #t if the character is a space character.
;;
;; o punct? (<chr> => <log>)
;;   #t if the character is a printing non-alphabetic, non-digit character.
;;
;; o print? (<chr> => <log>)
;;   #t if the character is a alphabetic, numeric, or punctuation character.
;;
;; o cap (<str> => <str>)
;;   Capitalize the first letter of the string, if it is alphabetic.
;;
;; o capwords (<str> => <str>)
;;   Break the string into space separated pieces, and then capitalize
;;   the first letter of each piece, if it's alphabetic.
;;
;; o to-upper (<str> => <str>)
;;   Make every alphabetic character in a string uppercase.
;;
;; o to-lower (<str> => <str>)
;;   Make every alphabetic character in a string lowercase.
;;
;; o swapcase (<str> => <str>)
;;   Switch the case of every alphabetic character in a string
;;
;; o ltrim (<str> => <str>)
;;   Remove the whitespace from the left side of a string.
;;
;; o rtrim (<str> => <str>)
;;   Remove the whitespace from the right side of a string.
;;
;; o translate (src|<str> from|<str> to|<str>)
;;   Returns a copy of src, such that each of its characters have been
;;   mapped through a translation table defined by from and to.
;
(use goo)
;
;; Boyer-Moore-Sunday string search. There's an ACM article describing
;; it, but I can't seem to dig up the reference. Basically you build a
;; shift table to decide how far to skip ahead when a match fails.
;
(dv alpha-size 256)
;
(df make-vec (size|<int> init|<any> => <vec>)
  (def v (fab <vec> size))
  (for ((i (below size)))
    (set [[v i] init))
  v)
;
(df build-shift-vector (pat|<str> => <vec>)
  (def m (len pat))
  (def v (make-vec alpha-size (+ m 1)))
  (for ((i (below m)))
    (set [[v (as <int> [[pat i])] (- m i)))
  v)
;
(df find-str (tgt|<str> pat|<str> rest|... => (t? <int>))
  (def pos (cond
             ((= (len rest) 0) 0)
             ((= (len rest) 1) [[rest 0])
             (#t (error "find: takes only 1 optional argument"))))
  (def v (build-shift-vector pat))
  (def m (len pat))
  (def n (len tgt))
  (def m* (- m 1))
  (def n* (- n 1))
  (rep loop ((p-ind 0) (t-ind pos) (i pos))
    (cond
      ((> t-ind n*) #f)
      ((> p-ind m*) #f)
      ((= [[pat p-ind] [[tgt t-ind])
       (if (= p-ind m*)
           i
           (loop (+ p-ind 1) (+ t-ind 1) i)))
      ((> (+ m i) n*) #f)
      (#t
       (let ((i* (+ i [[v (as <int> [[tgt (+ i m)])])))
         (loop 0 i* i*))))))
;
;; String replacement. This doesn't use any fancy algorithms; it just
;; uses string ports to build up the new string in linear time.
;
(df replace-str (s|<str> pat|<str> rpl|<str> => <str>)
  (def s-port (open <str-out-port> ""))
  (def n (len s))
  (def m (len pat))
  (loc ((loop (s-ind next-match)
              (if (>= s-ind n)
                  (port-contents s-port)
                  (if (= s-ind next-match)
                      (seq (puts s-port rpl)
                           (loop (+ s-ind m)
                                 (find-str s pat (+ s-ind m))))
                      (seq (put s-port [[s s-ind])
                           (loop (+ s-ind 1) next-match))))))
    (loop 0 (find-str s pat 0))))
;
;; The definition of the foo? functions could probably be cleaned up
;; a bit.
;
;; space? (<chr> => <log>)
;; returns #t if the character is a space character
;;
(dm space? (c|<chr> => <log>)
  (mem? " \f\n\r\t" c))
;
;; punct? (<chr> => <log>)
;; #t if the character is a printing non-alphabetic, non-digit character
(dm punct? (c|<chr> => <log>)
  (mem? "~`!@#$%^&*()_+-=[]{}\\|;:'\"<,>./?" c))
;
;; print? (<chr> => <log>)
;; #t if the character is a alphabetic, numeric, or punctuation character
(dm print? (c|<chr> => <log>)
  (or (alpha? c)
      (digit? c)
      (punct? c)))
;
;; cap (<str> => <str>)
;; Capitalize the first letter of the string, if it is alphabetic.
(dm cap (src|<str> => <str>)
  (def first? #t)
  (map (fun (c)
         (if first?
             (seq
               (set first? #f)
               (to-upper c))
             c))
       src))
;
;; capwords (<str> => <str>)
;; Break the string into space separated pieces, and then capitalize
;; the first letter of each piece, if it's alphabetic.
(dm capwords (src|<str> => <str>)
  (def s-port (open <str-out-port> ""))
  (def in-word? #f)
  (for ((i (below (len src))))
    (def c [src i])
    (if in-word?
        (seq
          (if (space? c)
              (set in-word? #f))
          (put s-port c))
        (seq
          (if (not (space? c))
              (set in-word? #t))
          (put s-port (to-upper c)))))
  (port-contents s-port))
;
;; to-upper (<str> => <str>)
;; Make every alphabetic character in a string uppercase.
(dm to-upper (src|<str> => <str>)
  (map to-upper src))
;
;; to-lower (<str> => <str>)
;; Make every alphabetic character in a string lowercase.
(dm to-lower (src|<str> => <str>)
  (map to-lower src))
;
;; swapcase (<str> => <str>)
;; Switch the case of every alphabetic character in a string
;; from lowercase to uppercase, and from uppercase to lowercase.
(dm swapcase (src|<str> => <str>)
  (map (fun (c)
         (cond
           ((upper? c) (to-lower c))
           ((lower? c) (to-upper c))
           (#t c)))
       src))
;
;; ltrim (<str> => <str>)
;; Remove the whitespace from the left side of a string.
(dm ltrim (src|<str> => <str>)
  (def n (len src))
  (def i
    (rep loop ((i 0))
      (if (and (space? [src i]) (< i n))
          (loop (+ i 1))
          i)))
  (sub* src i))
;
;; rtrim (<str> => <str>)
;; Remove the whitespace from the right side of a string.
(dm rtrim (src|<str> => <str>)
  (def n (len src))
  (def i
    (rep loop ((i n))
      (if (and (> i 0) (space? [src (- i 1)]) )
          (loop (- i 1))
          i)))
  (sub src 0 i))
;
;; translate (src|<str> from|<str> to|<str>)
;; Returns a copy of src, such that each of its characters have been
;; mapped through a translation table defined by from and to.
(dm translate (src|<str> from|<str> to|<str>)
  (if (~= (len from) (len to))
      (error "translate: from pattern not the same length as to-pattern."))
  (def trans (fab <vec> 256))
  (for ((i (below 256)))
    (set [trans i] (as <chr> i)))
  (for ((i (below (len from))))
    (set [trans (as <int> [from i])] [to i]))
  (map {c . [trans (as <int> c)]} src))
;
(export find-str)
(export replace-str)
(export space?)
(export punct?)
(export print?)
(export cap)
(export capwords)
;; (export to-upper) ; just added a method to to-upper
;; (export to-lower) ; just added a method to to-lower
(export swapcase)
(export ltrim)
(export rtrim)
(export translate)
;
;; ==== END OF strutil.goo ====
Clone this wiki locally