From 4db7a32b1c77745e6008dd71e0eb508bcd58d9f8 Mon Sep 17 00:00:00 2001 From: ESL Date: Mon, 27 Mar 2023 18:58:37 -0400 Subject: [PATCH] binary/bytevector i/o --- i.c | 85 +++++++++++++++++++++++++++++++++++++- i.h | 18 +++++++-- s.c | 62 +++++++++++++++++++++++++++- src/s.scm | 119 ++++++++++++++++++++++++++++++++++++++++++------------ 4 files changed, 250 insertions(+), 34 deletions(-) diff --git a/i.c b/i.c index 24dcef9..8b41044 100644 --- a/i.c +++ b/i.c @@ -186,6 +186,7 @@ static void _sck(obj *s) { #define are_fixnums(o1, o2) are_fixnum_objs(o1, o2) #define get_fixnum(o) get_fixnum_unchecked(o) #define is_byte(o) is_byte_obj(o) +#define byte_obj(x) obj_from_fixnum((unsigned char)(x)) #define get_byte(o) ((unsigned char)get_fixnum_unchecked(o)) #define flonum_obj(x) hp_pushptr(dupflonum(x), FLONUM_NTAG) #define is_flonum(o) is_flonum_obj(o) @@ -205,6 +206,8 @@ static void _sck(obj *s) { #define oport_file_obj(fp) hp_pushptr((fp), OPORT_FILE_NTAG) #define iport_string_obj(fp) hp_pushptr((fp), IPORT_STRING_NTAG) #define oport_string_obj(fp) hp_pushptr((fp), OPORT_STRING_NTAG) +#define iport_bytevector_obj(fp) hp_pushptr((fp), IPORT_BYTEVECTOR_NTAG) +#define oport_bytevector_obj(fp) hp_pushptr((fp), OPORT_BYTEVECTOR_NTAG) #define is_iport(o) isiport(o) #define is_oport(o) isoport(o) #define is_box(o) isbox(o) @@ -2850,20 +2853,34 @@ define_instruction(opop) { gonexti(); } -define_instruction(otip) { +define_instruction(oif) { FILE *fp = fopen(stringchars(ac), "r"); if (fp == NULL) fail("can't open input file"); ac = iport_file_obj(fp); gonexti(); } -define_instruction(otop) { +define_instruction(oof) { FILE *fp = fopen(stringchars(ac), "w"); if (fp == NULL) fail("can't open output file"); ac = oport_file_obj(fp); gonexti(); } +define_instruction(obif) { + FILE *fp = fopen(stringchars(ac), "rb"); + if (fp == NULL) fail("can't open binary input file"); + ac = iport_file_obj(fp); + gonexti(); +} + +define_instruction(obof) { + FILE *fp = fopen(stringchars(ac), "wb"); + if (fp == NULL) fail("can't open binary output file"); + ac = oport_file_obj(fp); + gonexti(); +} + define_instruction(ois) { int *d; cks(ac); d = dupstring(stringdata(ac)); @@ -2876,6 +2893,19 @@ define_instruction(oos) { gonexti(); } +define_instruction(oib) { + int *d; unsigned char *p, *e; ckb(ac); + d = dupbytevector(bytevectordata(ac)); + p = bvdatabytes(d), e = p + *d; + ac = iport_bytevector_obj(bvialloc(p, e, d)); + gonexti(); +} + +define_instruction(oob) { + ac = oport_bytevector_obj(newcb()); + gonexti(); +} + define_instruction(cip) { cxtype_iport_t *vt; ckr(ac); vt = iportvt(ac); assert(vt); @@ -2911,6 +2941,19 @@ define_instruction(gos) { gonexti(); } +define_instruction(gob) { + cxtype_oport_t *vt; ckw(ac); + vt = ckoportvt(ac); + if (vt != (cxtype_oport_t *)OPORT_BYTEVECTOR_NTAG && + vt != (cxtype_oport_t *)OPORT_STRING_NTAG) { + ac = eof_obj(); + } else { + cbuf_t *pcb = oportdata(ac); + int len = (int)(pcb->fill - pcb->buf); + ac = bytevector_obj(newbytevector((unsigned char *)pcb->buf, len)); + } + gonexti(); +} define_instruction(rdc) { int c; ckr(ac); @@ -2934,6 +2977,29 @@ define_instruction(rdcr) { gonexti(); } +define_instruction(rd8) { + int c; ckr(ac); + c = iportgetc(ac); + if (unlikely(c == EOF)) ac = eof_obj(); + else ac = byte_obj(c); + gonexti(); +} + +define_instruction(rda8) { + int c; ckr(ac); + c = iportpeekc(ac); + if (unlikely(c == EOF)) ac = eof_obj(); + else ac = byte_obj(c); + gonexti(); +} + +define_instruction(rd8r) { + ckr(ac); + ac = bool_obj(1); /* no portable way to detect hanging? */ + gonexti(); +} + + define_instruction(eofp) { ac = bool_obj(is_eof(ac)); gonexti(); @@ -2959,6 +3025,21 @@ define_instruction(wrs) { gonexti(); } +define_instruction(wr8) { + obj x = ac, y = spop(); ck8(x); ckw(y); + oportputc(get_byte(x), y); + ac = void_obj(); + gonexti(); +} + +define_instruction(wrb) { + obj x = ac, y = spop(); int *d; ckb(x); ckw(y); + d = bytevectordata(x); + oportwrite((char *)bvdatabytes(d), *d, y); + ac = void_obj(); + gonexti(); +} + define_instruction(wrcd) { obj x = ac, y = spop(); ckw(y); oportputcircular(x, y, 1); diff --git a/i.h b/i.h index 7df2e32..0e068df 100644 --- a/i.h +++ b/i.h @@ -459,21 +459,31 @@ declare_instruction(sop, "P11", 0, "current-output-port", declare_instruction(sep, "P12", 0, "current-error-port", '0', AUTOGL) declare_instruction(ipop, "P20", 0, "input-port-open?", '1', AUTOGL) declare_instruction(opop, "P21", 0, "output-port-open?", '1', AUTOGL) -declare_instruction(otip, "P40", 0, "open-input-file", '1', AUTOGL) -declare_instruction(otop, "P41", 0, "open-output-file", '1', AUTOGL) +declare_instruction(oif, "P40", 0, "open-input-file", '1', AUTOGL) +declare_instruction(oof, "P41", 0, "open-output-file", '1', AUTOGL) +declare_instruction(obif, "P42", 0, "open-binary-input-file", '1', AUTOGL) +declare_instruction(obof, "P43", 0, "open-binary-output-file", '1', AUTOGL) declare_instruction(ois, "P50", 0, "open-input-string", '1', AUTOGL) declare_instruction(oos, "P51", 0, "open-output-string", '0', AUTOGL) +declare_instruction(oib, "P52", 0, "open-input-bytevector", '1', AUTOGL) +declare_instruction(oob, "P53", 0, "open-output-bytevector", '0', AUTOGL) declare_instruction(cip, "P60", 0, "close-input-port", '1', AUTOGL) declare_instruction(cop, "P61", 0, "close-output-port", '1', AUTOGL) declare_instruction(fop, "P71", 0, "flush-output-port", '1', AUTOGL) -declare_instruction(gos, "P9", 0, "get-output-string", '1', AUTOGL) +declare_instruction(gos, "P90", 0, "get-output-string", '1', AUTOGL) +declare_instruction(gob, "P91", 0, "get-output-bytevector", '1', AUTOGL) declare_instruction(rdc, "R0\0P10", 0, "read-char", 'u', AUTOGL) declare_instruction(rdac, "R1\0P10", 0, "peek-char", 'u', AUTOGL) declare_instruction(rdcr, "R2\0P10", 0, "char-ready?", 'u', AUTOGL) +declare_instruction(rd8, "R3\0P10", 0, "read-u8", 'u', AUTOGL) +declare_instruction(rda8, "R4\0P10", 0, "peek-u8", 'u', AUTOGL) +declare_instruction(rd8r, "R5\0P10", 0, "u8-ready?", 'u', AUTOGL) declare_instruction(eofp, "R8", 0, "eof-object?", '1', AUTOGL) declare_instruction(eof, "R9", 0, "eof-object", '0', AUTOGL) declare_instruction(wrc, "W0\0P11", 0, "write-char", 'b', AUTOGL) -declare_instruction(wrs, "W1\0P11", 0, "write-string", 'b', AUTOGL) +declare_instruction(wrs, "W1", 0, "%write-string1", '2', AUTOGL) +declare_instruction(wr8, "W2\0P11", 0, "write-u8", 'b', AUTOGL) +declare_instruction(wrb, "W3", 0, "%write-bytevector1", '2', AUTOGL) declare_instruction(wrcd, "W4\0P11", 0, "display", 'b', AUTOGL) declare_instruction(wrcw, "W5\0P11", 0, "write", 'b', AUTOGL) declare_instruction(wrnl, "W6\0P11", 0, "newline", 'u', AUTOGL) diff --git a/s.c b/s.c index 34371b5..263e175 100644 --- a/s.c +++ b/s.c @@ -416,6 +416,22 @@ char *s_code[] = { "%!0.0,'0,${.4,@(y23:%25bytevectors-sum-length)[01}B2,@(y23:%25bytevect" "ors-copy-into!)[12", + "P", "subutf8->string", + "%3P51,${.2,.6,.6,.6,@(y19:write-subbytevector)[04}.0P90,.1P61.0]5", + + "C", 0, + "&0{%3.2,.2,.2,@(y15:subutf8->string)[33}%x,&0{%2.0B3,.2,.2,@(y15:subut" + "f8->string)[23}%x,&0{%1.0B3,'0,.2,@(y15:subutf8->string)[13}%x,&3{|10|" + "21|32%%}@!(y12:utf8->string)", + + "P", "substring->utf8", + "%3P53,${.2,.6,.6,.6,@(y15:write-substring)[04}.0P91,.1P61.0]5", + + "C", 0, + "&0{%3.2,.2,.2,@(y15:substring->utf8)[33}%x,&0{%2.0S3,.2,.2,@(y15:subst" + "ring->utf8)[23}%x,&0{%1.0S3,'0,.2,@(y15:substring->utf8)[13}%x,&3{|10|" + "21|32%%}@!(y12:string->utf8)", + "P", "%apply", "%!2${.2,.5,,#0.0,&1{%2.1u?{.0]2}${.3d,.4a,:0^[02},.1c]2}.!0.0^_1[02},." "2,@(y13:apply-to-list)[32", @@ -503,6 +519,12 @@ char *s_code[] = { "P", "port?", "%1.0P00,.0?{.0]2}.1P01]2", + "C", 0, + "@(y5:port?)@!(y13:textual-port?)", + + "C", 0, + "@(y5:port?)@!(y12:binary-port?)", + "P", "close-port", "%1.0P00?{.0P60}.0P01?{.0P61]1}]1", @@ -518,8 +540,8 @@ char *s_code[] = { "P", "read-line", "%!0P51,.1u?{P10}{.1a},t,,#0.2,.4,.2,&3{%1:2R0,.0R8,.0?{.0}{'(c%0a),.2C" - "=}_1?{.0R8?{.1}{f}?{.0]2}:1P9,:1P61.0]3}'(c%0d),.1C=?{f,:0^[21}:1,.1W0" - "f,:0^[21}.!0.0^_1[31", + "=}_1?{.0R8?{.1}{f}?{.0]2}:1P90,:1P61.0]3}'(c%0d),.1C=?{f,:0^[21}:1,.1W" + "0f,:0^[21}.!0.0^_1[31", "P", "read-substring!", "%4.1,,#0.5,.4,.4,.3,.8,&5{%1:0,.1Istring -;string->utf8 +(define (subutf8->string vec start end) + (let ([p (open-output-string)]) + (write-subbytevector vec start end p) + ; todo: make a single operation: get-final-output-string (can reuse cbuf?) + (let ([s (get-output-string p)]) (close-output-port p) s))) + +(define utf8->string + (case-lambda + [(bvec) (subutf8->string bvec 0 (bytevector-length bvec))] + [(bvec start) (subutf8->string bvec start (bytevector-length bvec))] + [(bvec start end) (subutf8->string bvec start end)])) + +(define (substring->utf8 str start end) + (let ([p (open-output-bytevector)]) + (write-substring str start end p) + ; todo: make a single operation: get-final-output-bytevector (can reuse cbuf?) + (let ([v (get-output-bytevector p)]) (close-output-port p) v))) + +(define string->utf8 + (case-lambda + [(str) (substring->utf8 str 0 (string-length str))] + [(str start) (substring->utf8 str start (string-length str))] + [(str start end) (substring->utf8 str start end)])) ;--------------------------------------------------------------------------------------------- @@ -1060,18 +1081,25 @@ ; (output-port? x) ; (input-port-open? p) ; (output-port-open? p) -; (current-input-port) -; (current-output-port) -; (current-error-port) -; (open-output-string) +; (current-input-port) ; need to be made into a parameter +; (current-output-port) ; need to be made into a parameter +; (current-error-port) ; need to be made into a parameter ; (open-input-file s) +; (open-binary-input-file s) ; (open-output-file x) -; (open-input-string x) -; (close-input-port x) -; (close-output-port x) -; (get-output-string x) +; (open-binary-output-file x) +; (close-input-port p) +; (close-output-port p) +; (open-input-string s) +; (open-output-string) +; (get-output-string p) +; (open-input-bytevector b) +; (open-output-bytevector) +; (get-output-bytevector p) (define (port? x) (or (input-port? x) (output-port? x))) +(define textual-port? port?) ; all ports are bimodal +(define binary-port? port?) ; all ports are bimodal (define (close-port p) (if (input-port? p) (close-input-port p)) @@ -1089,11 +1117,6 @@ ;with-input-from-file -- requires parameterize ;with-output-to-file -- requires parameterize -;open-binary-input-file -;open-binary-output-file -;open-input-bytevector -;open-output-bytevector -;get-output-bytevector ;--------------------------------------------------------------------------------------------- @@ -1105,6 +1128,9 @@ ; (read-char (p (current-input-port))) ; (peek-char (p (current-input-port))) ; (char-ready? (p (current-input-port))) +; (read-u8 (p (current-input-port))) +; (peek-u8 (p (current-input-port))) +; (u8-ready? (p (current-input-port))) ; (eof-object? x) ; (eof-object) @@ -1147,11 +1173,30 @@ [(k) (read-substring k (current-input-port))] [(k p) (read-substring k p)])) -;read-u8 -;peek-u8 -;u8-ready? -;read-bytevector -;read-bytevector! +(define (read-subbytevector! bvec start end p) + (let loop ([i start]) + (if (fx>=? i end) (fx- i start) + (let ([u8 (read-u8 p)]) + (cond [(eof-object? u8) (if (fx=? i start) u8 (fx- i start))] + [else (bytevector-u8-set! bvec i u8) (loop (fx+ i 1))]))))) + +(define (read-subbytevector k p) + (let ([bvec (make-bytevector k)]) + (let ([r (read-subbytevector! bvec 0 k p)]) + (if (eof-object? r) r + (if (fx=? r k) bvec (subbytevector bvec 0 r)))))) + +(define read-bytevector! + (case-lambda + [(bvec) (read-subbytevector! bvec 0 (bytevector-length bvec) (current-input-port))] + [(bvec p) (read-subbytevector! bvec 0 (bytevector-length bvec) p)] + [(bvec p start) (read-subbytevector! bvec start (bytevector-length bvec) p)] + [(bvec p start end) (read-subbytevector! bvec start end p)])) + +(define read-bytevector + (case-lambda + [(k) (read-subbytevector k (current-input-port))] + [(k p) (read-subbytevector k p)])) (define (%read port simple?) (define-syntax r-error @@ -1496,14 +1541,36 @@ ; integrables: ; -; (write-char c (p (current-output-port))) -; (write-string s (p (current-output-port))) -; (display x (p (current-output-port))) ; (write x (p (current-output-port))) -; (newline (p (current-output-port))) ; (write-shared x (p (current-output-port))) ; (write-simple x (p (current-output-port))) -; (flush-output-port p) +; (display x (p (current-output-port))) +; (newline (p (current-output-port))) +; (write-char c (p (current-output-port))) +; (%write-string1 s p) + +; (write-u8 u8 (p (current-output-port))) +; (%write-bytevector1 b p) + +; (flush-output-port (p (current-output-port))) + +(define (write-substring from start end p) + (do ([i start (fx+ i 1)]) [(fx>=? i end)] (write-char (string-ref from i) p))) + +(define write-string + (case-lambda + [(str) (%write-string1 str (current-output-port))] + [(str p) (%write-string1 str p)] + [(str p start) (write-substring str start (string-length str) p)] + [(str p start end) (write-substring str start end p)])) + +(define (write-subbytevector from start end p) + (do ([i start (fx+ i 1)]) [(fx>=? i end)] (write-u8 (bytevector-u8-ref from i) p))) + +(define write-bytevector + (case-lambda + [(bvec) (%write-bytevector1 bvec (current-output-port))] + [(bvec p) (%write-bytevector1 bvec p)] + [(bvec p start) (write-subbytevector bvec start (bytevector-length bvec) p)] + [(bvec p start end) (write-subbytevector bvec start end p)])) ;--------------------------------------------------------------------------------------------- @@ -1517,7 +1584,7 @@ ;command-line ;exit ;emergency-exit -;get-environment-variable +;(get-environment-variable s) ;get-environment-variables ; (current-second) ; (current-jiffy)