-
Notifications
You must be signed in to change notification settings - Fork 1
strutil
Bruce Mitchener edited this page Jan 6, 2014
·
1 revision
;; ==== START OF strutil.goo ==== ;; ;; Copyright 2002 by Neelakantan Krishnaswami ;; ;; Permission is hereby granted, free of charge, to any person obtaining ;; a copy of this software and associated documentation files (the ;; "Software"), to deal in the Software without restriction, including ;; without limitation the rights to use, copy, modify, merge, publish, ;; distribute, sublicense, and/or sell copies of the Software, and to ;; permit persons to whom the Software is furnished to do so, subject to ;; the following conditions: ;; ;; The above copyright notice and this permission notice shall be ;; included in all copies or substantial portions of the Software. ;; ;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ;; IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR ;; OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ;; ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ;; OTHER DEALINGS IN THE SOFTWARE. ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Author: Neel Krishnaswami <[email protected]> ;; Version: 0.3 ;; Synopsis: This library exports several functions and methods for ;; processing strings. The functions here were inspired by ;; the Python string module and Java's String class. ;; ;; o find-str (tgt|<str> pat|<str> rest|... => (t? <int>)) ;; - tgt is the target string to search in ;; - pat is the pattern string to be matched ;; - There is one optional argument, pos, which says where to ;; start searching ;; ;; The return value is the integer index of the beginning of the ;; first matching substring of tgt. On a match failure it returns ;; #f. ;; ;; o replace-str (s|<str> pat|<str> rpl|<str> => <str>) ;; - s is the string to search in ;; - pat is the pattern string to search for ;; - rpl is the replacement text ;; ;; The return value is a new <str>. If no ;; ;; o space? (<chr> => <log>) ;; returns #t if the character is a space character. ;; ;; o punct? (<chr> => <log>) ;; #t if the character is a printing non-alphabetic, non-digit character. ;; ;; o print? (<chr> => <log>) ;; #t if the character is a alphabetic, numeric, or punctuation character. ;; ;; o cap (<str> => <str>) ;; Capitalize the first letter of the string, if it is alphabetic. ;; ;; o capwords (<str> => <str>) ;; Break the string into space separated pieces, and then capitalize ;; the first letter of each piece, if it's alphabetic. ;; ;; o to-upper (<str> => <str>) ;; Make every alphabetic character in a string uppercase. ;; ;; o to-lower (<str> => <str>) ;; Make every alphabetic character in a string lowercase. ;; ;; o swapcase (<str> => <str>) ;; Switch the case of every alphabetic character in a string ;; ;; o ltrim (<str> => <str>) ;; Remove the whitespace from the left side of a string. ;; ;; o rtrim (<str> => <str>) ;; Remove the whitespace from the right side of a string. ;; ;; o translate (src|<str> from|<str> to|<str>) ;; Returns a copy of src, such that each of its characters have been ;; mapped through a translation table defined by from and to. ; (use goo) ; ;; Boyer-Moore-Sunday string search. There's an ACM article describing ;; it, but I can't seem to dig up the reference. Basically you build a ;; shift table to decide how far to skip ahead when a match fails. ; (dv alpha-size 256) ; (df make-vec (size|<int> init|<any> => <vec>) (def v (fab <vec> size)) (for ((i (below size))) (set [[v i] init)) v) ; (df build-shift-vector (pat|<str> => <vec>) (def m (len pat)) (def v (make-vec alpha-size (+ m 1))) (for ((i (below m))) (set [[v (as <int> [[pat i])] (- m i))) v) ; (df find-str (tgt|<str> pat|<str> rest|... => (t? <int>)) (def pos (cond ((= (len rest) 0) 0) ((= (len rest) 1) [[rest 0]) (#t (error "find: takes only 1 optional argument")))) (def v (build-shift-vector pat)) (def m (len pat)) (def n (len tgt)) (def m* (- m 1)) (def n* (- n 1)) (rep loop ((p-ind 0) (t-ind pos) (i pos)) (cond ((> t-ind n*) #f) ((> p-ind m*) #f) ((= [[pat p-ind] [[tgt t-ind]) (if (= p-ind m*) i (loop (+ p-ind 1) (+ t-ind 1) i))) ((> (+ m i) n*) #f) (#t (let ((i* (+ i [[v (as <int> [[tgt (+ i m)])]))) (loop 0 i* i*)))))) ; ;; String replacement. This doesn't use any fancy algorithms; it just ;; uses string ports to build up the new string in linear time. ; (df replace-str (s|<str> pat|<str> rpl|<str> => <str>) (def s-port (open <str-out-port> "")) (def n (len s)) (def m (len pat)) (loc ((loop (s-ind next-match) (if (>= s-ind n) (port-contents s-port) (if (= s-ind next-match) (seq (puts s-port rpl) (loop (+ s-ind m) (find-str s pat (+ s-ind m)))) (seq (put s-port [[s s-ind]) (loop (+ s-ind 1) next-match)))))) (loop 0 (find-str s pat 0)))) ; ;; The definition of the foo? functions could probably be cleaned up ;; a bit. ; ;; space? (<chr> => <log>) ;; returns #t if the character is a space character ;; (dm space? (c|<chr> => <log>) (mem? " \f\n\r\t" c)) ; ;; punct? (<chr> => <log>) ;; #t if the character is a printing non-alphabetic, non-digit character (dm punct? (c|<chr> => <log>) (mem? "~`!@#$%^&*()_+-=[]{}\\|;:'\"<,>./?" c)) ; ;; print? (<chr> => <log>) ;; #t if the character is a alphabetic, numeric, or punctuation character (dm print? (c|<chr> => <log>) (or (alpha? c) (digit? c) (punct? c))) ; ;; cap (<str> => <str>) ;; Capitalize the first letter of the string, if it is alphabetic. (dm cap (src|<str> => <str>) (def first? #t) (map (fun (c) (if first? (seq (set first? #f) (to-upper c)) c)) src)) ; ;; capwords (<str> => <str>) ;; Break the string into space separated pieces, and then capitalize ;; the first letter of each piece, if it's alphabetic. (dm capwords (src|<str> => <str>) (def s-port (open <str-out-port> "")) (def in-word? #f) (for ((i (below (len src)))) (def c [src i]) (if in-word? (seq (if (space? c) (set in-word? #f)) (put s-port c)) (seq (if (not (space? c)) (set in-word? #t)) (put s-port (to-upper c))))) (port-contents s-port)) ; ;; to-upper (<str> => <str>) ;; Make every alphabetic character in a string uppercase. (dm to-upper (src|<str> => <str>) (map to-upper src)) ; ;; to-lower (<str> => <str>) ;; Make every alphabetic character in a string lowercase. (dm to-lower (src|<str> => <str>) (map to-lower src)) ; ;; swapcase (<str> => <str>) ;; Switch the case of every alphabetic character in a string ;; from lowercase to uppercase, and from uppercase to lowercase. (dm swapcase (src|<str> => <str>) (map (fun (c) (cond ((upper? c) (to-lower c)) ((lower? c) (to-upper c)) (#t c))) src)) ; ;; ltrim (<str> => <str>) ;; Remove the whitespace from the left side of a string. (dm ltrim (src|<str> => <str>) (def n (len src)) (def i (rep loop ((i 0)) (if (and (space? [src i]) (< i n)) (loop (+ i 1)) i))) (sub* src i)) ; ;; rtrim (<str> => <str>) ;; Remove the whitespace from the right side of a string. (dm rtrim (src|<str> => <str>) (def n (len src)) (def i (rep loop ((i n)) (if (and (> i 0) (space? [src (- i 1)]) ) (loop (- i 1)) i))) (sub src 0 i)) ; ;; translate (src|<str> from|<str> to|<str>) ;; Returns a copy of src, such that each of its characters have been ;; mapped through a translation table defined by from and to. (dm translate (src|<str> from|<str> to|<str>) (if (~= (len from) (len to)) (error "translate: from pattern not the same length as to-pattern.")) (def trans (fab <vec> 256)) (for ((i (below 256))) (set [trans i] (as <chr> i))) (for ((i (below (len from)))) (set [trans (as <int> [from i])] [to i])) (map {c . [trans (as <int> c)]} src)) ; (export find-str) (export replace-str) (export space?) (export punct?) (export print?) (export cap) (export capwords) ;; (export to-upper) ; just added a method to to-upper ;; (export to-lower) ; just added a method to to-lower (export swapcase) (export ltrim) (export rtrim) (export translate) ; ;; ==== END OF strutil.goo ====