ユニコード文字列をバイトストリームとして扱うためのパッケージ
タイトル通りのパッケージ。
実装の前に使用例。
;;;; sbcl-1.0.49 ;; 例で使用する文字列(および対応するバイト列) (sb-ext:string-to-octets "下書き") --> #(228 184 139 230 155 184 227 129 141) ;; 作成 (defparameter *in* (octet-stream:make "下書き")) --> *in* ;; 一バイト先読みする (octet-stream:peek *in*) --> 228 ;; 一バイト読み込む (octet-stream:read *in*) --> 228 (octet-stream:read *in*) --> 184 (octet-stream:peek *in*) --> 139 ;; 一バイト読み捨てる (octet-stream:eat *in*) --> #S(OCTET-STREAM::OCTET-STREAM :SRC "下書き" :POS 1 :END 3 :CODE 26360 :OCTET-POS 3 :OCTET-LEN 3) (octet-stream:peek *in*) --> 230 ;; 終端判定 (octet-stream:eos? *in*) --> NIL
実装:
(defpackage octet-stream (:use :common-lisp) (:shadow :common-lisp read peek position) (:export make read peek eos? eat position)) (in-package :octet-stream) ;;;;;;;;;;;;;;;; ;;; declaration (declaim (inline make-octet-stream make eos? octet-length peek read eat position)) ;;;;;;;;;;;;;;;;;;; ;;; type definition (deftype array-index () `(mod ,array-dimension-limit)) (deftype simple-characters () '(simple-array character)) (deftype unicode () `(mod ,char-code-limit)) ;;;;;;;;;;;;;;;; ;;; octet-stream (defstruct octet-stream (src "" :type simple-characters) (pos 0 :type array-index) (end 0 :type array-index) (code 0 :type unicode) (octet-pos 0 :type (mod 5)) (octet-len 0 :type (mod 5))) ;;;;;;;;;;;;;;;;;;;;;; ;;; auxiliary function (defun octet-length (code) (declare (unicode code)) (cond ((< code #x80) 1) ((< code #x800) 2) ((< code #x10000) 3) (t 4))) ;;;;;;;;;;;;;;;;;;;;; ;;; external function (defun position (in) ; 現在位置 (バイト単位ではなく文字単位) (octet-stream-pos in)) (defun make (string &key (start 0) (end (length string))) (declare (simple-characters string) (array-index start end)) (let* ((code (if (= start (length string)) 0 (char-code (char string start)))) (len (octet-length code))) (make-octet-stream :src string :pos start :end end :code code :octet-pos len :octet-len len))) (defun eos? (in) (with-slots (pos end) (the octet-stream in) (= pos end))) ;; 一バイト分先読みする。(eos? in)が真の時に呼び出された場合、返す値は未定義。 (defun peek (in) (with-slots (code octet-pos octet-len) (the octet-stream in) (if (= octet-pos octet-len) (case octet-len (1 code) (2 (+ #b11000000 (ldb (byte 5 6) code))) (3 (+ #b11100000 (ldb (byte 4 12) code))) (t (+ #b11110000 (ldb (byte 3 18) code)))) (+ #b10000000 (ldb (byte 6 (* (the (mod 4) (1- octet-pos)) 6)) code))))) (defun eat (in) (with-slots (src pos code octet-pos octet-len) (the octet-stream in) (decf octet-pos) (when (zerop octet-pos) (incf pos) (unless (eos? in) (setf code (char-code (char src pos)) octet-len (octet-length code) octet-pos octet-len)))) in) (defun read (in) (prog1 (peek in) (eat in)))