diff --git a/Doc/library/io.rst b/Doc/library/io.rst index 494e57fe1c0474..88180cb9fba844 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -739,6 +739,15 @@ than raw I/O does. Return :class:`bytes` containing the entire contents of the buffer. + .. method:: peek(size=1, /) + + Return bytes from the current position onwards without advancing the position. + At least one byte of data is returned if not at EOF. + Return an empty :class:`bytes` object at EOF. + If the size argument is negative or larger than the number of available bytes, + a copy of the buffer from the current position until the end is returned. + + .. versionadded:: 3.15 .. method:: read1(size=-1, /) @@ -772,8 +781,13 @@ than raw I/O does. .. method:: peek(size=0, /) - Return bytes from the stream without advancing the position. The number of - bytes returned may be less or more than requested. If the underlying raw + Return bytes from the current position onwards without advancing the position. + At least one byte of data is returned if not at EOF. + Return an empty :class:`bytes` object at EOF. + At most one single read on the underlying raw stream is done to satisfy the call. + The *size* argument is ignored. + The number of read bytes depends on the buffer size and the current position in the internal buffer. + If the underlying raw stream is non-blocking and the operation would block, returns empty bytes. .. method:: read(size=-1, /) diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 927d6035c8c4bc..0bb823b5062a80 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -840,6 +840,14 @@ inspect for :func:`~inspect.getdoc`. (Contributed by Serhiy Storchaka in :gh:`132686`.) + +io +-- + +* Add :meth:`io.BytesIO.peek`. + (Contributed by Marcel Martin in :gh:`90533`.) + + json ---- diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 3306c8a274760b..fc6da3ac701fdc 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -996,6 +996,13 @@ def tell(self): raise ValueError("tell on closed file") return self._pos + def peek(self, size=1): + if self.closed: + raise ValueError("peek on closed file") + if size < 0: + return self._buffer[self._pos:] + return self._buffer[self._pos:self._pos + size] + def truncate(self, pos=None): if self.closed: raise ValueError("truncate on closed file") diff --git a/Lib/test/test_io/test_memoryio.py b/Lib/test/test_io/test_memoryio.py index 482b183da23ffa..4393ed074fae24 100644 --- a/Lib/test/test_io/test_memoryio.py +++ b/Lib/test/test_io/test_memoryio.py @@ -566,6 +566,48 @@ def test_issue141311(self): buf = bytearray(2) self.assertEqual(0, memio.readinto(buf)) + def test_peek(self): + buf = self.buftype("1234567890") + with self.ioclass(buf) as memio: + self.assertEqual(memio.tell(), 0) + self.assertEqual(memio.peek(1), buf[:1]) + self.assertEqual(memio.peek(1), buf[:1]) + self.assertEqual(memio.peek(), buf[:1]) + self.assertEqual(memio.peek(3), buf[:3]) + self.assertEqual(memio.peek(5), buf[:5]) + self.assertEqual(memio.peek(0), b"") + self.assertEqual(memio.peek(len(buf) + 100), buf) + self.assertEqual(memio.peek(-1), buf) + self.assertEqual(memio.tell(), 0) + memio.read(1) + self.assertEqual(memio.tell(), 1) + self.assertEqual(memio.peek(1), buf[1:2]) + self.assertEqual(memio.peek(), buf[1:2]) + self.assertEqual(memio.peek(3), buf[1:4]) + self.assertEqual(memio.peek(5), buf[1:6]) + self.assertEqual(memio.peek(0), b"") + self.assertEqual(memio.peek(len(buf) + 100), buf[1:]) + self.assertEqual(memio.peek(-1), buf[1:]) + self.assertEqual(memio.tell(), 1) + memio.read() + self.assertEqual(memio.tell(), len(buf)) + self.assertEqual(memio.peek(1), self.EOF) + self.assertEqual(memio.peek(3), self.EOF) + self.assertEqual(memio.peek(5), self.EOF) + self.assertEqual(memio.peek(0), b"") + self.assertEqual(memio.tell(), len(buf)) + # Peeking works after writing + abc = self.buftype("abc") + memio.write(abc) + self.assertEqual(memio.peek(), self.EOF) + memio.seek(len(buf)) + self.assertEqual(memio.peek(), abc[:1]) + self.assertEqual(memio.peek(-1), abc) + self.assertEqual(memio.peek(len(abc) + 100), abc) + self.assertEqual(memio.tell(), len(buf)) + + self.assertRaises(ValueError, memio.peek) + def test_unicode(self): memio = self.ioclass() diff --git a/Misc/NEWS.d/next/Library/2022-04-10-20-10-59.bpo-46375.8j1ogZ.rst b/Misc/NEWS.d/next/Library/2022-04-10-20-10-59.bpo-46375.8j1ogZ.rst new file mode 100644 index 00000000000000..c11f101b749c17 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-04-10-20-10-59.bpo-46375.8j1ogZ.rst @@ -0,0 +1 @@ +Add :meth:`io.BytesIO.peek`. diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c index d088bb0efac797..d859104d547cf9 100644 --- a/Modules/_io/bytesio.c +++ b/Modules/_io/bytesio.c @@ -420,8 +420,9 @@ _io_BytesIO_tell_impl(bytesio *self) return PyLong_FromSsize_t(self->pos); } +// Read without advancing position static PyObject * -read_bytes_lock_held(bytesio *self, Py_ssize_t size) +peek_bytes_lock_held(bytesio *self, Py_ssize_t size) { _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(self); @@ -432,7 +433,6 @@ read_bytes_lock_held(bytesio *self, Py_ssize_t size) if (size > 1 && self->pos == 0 && size == PyBytes_GET_SIZE(self->buf) && FT_ATOMIC_LOAD_SSIZE_RELAXED(self->exports) == 0) { - self->pos += size; return Py_NewRef(self->buf); } @@ -444,10 +444,19 @@ read_bytes_lock_held(bytesio *self, Py_ssize_t size) } output = PyBytes_AS_STRING(self->buf) + self->pos; - self->pos += size; return PyBytes_FromStringAndSize(output, size); } +static PyObject * +read_bytes_lock_held(bytesio *self, Py_ssize_t size) +{ + PyObject *bytes = peek_bytes_lock_held(self, size); + if (bytes != NULL) { + self->pos += size; + } + return bytes; +} + /*[clinic input] @critical_section _io.BytesIO.read @@ -499,6 +508,38 @@ _io_BytesIO_read1_impl(bytesio *self, Py_ssize_t size) return _io_BytesIO_read_impl(self, size); } + +/*[clinic input] +_io.BytesIO.peek + size: Py_ssize_t = 1 + / + +Return bytes from the stream without advancing the position. + +If the size argument is negative, read until EOF is reached. +Return an empty bytes object at EOF. +[clinic start generated code]*/ + +static PyObject * +_io_BytesIO_peek_impl(bytesio *self, Py_ssize_t size) +/*[clinic end generated code: output=fa4d8ce28b35db9b input=1510f0fcf77c0048]*/ +{ + CHECK_CLOSED(self); + + /* adjust invalid sizes */ + Py_ssize_t n = self->string_size - self->pos; + if (size < 0 || size > n) { + size = n; + /* n can be negative after truncate() or seek() */ + if (size < 0) { + size = 0; + } + } + return peek_bytes_lock_held(self, size); +} + + + /*[clinic input] @critical_section _io.BytesIO.readline @@ -1135,6 +1176,7 @@ static struct PyMethodDef bytesio_methods[] = { _IO_BYTESIO_READLINE_METHODDEF _IO_BYTESIO_READLINES_METHODDEF _IO_BYTESIO_READ_METHODDEF + _IO_BYTESIO_PEEK_METHODDEF _IO_BYTESIO_GETBUFFER_METHODDEF _IO_BYTESIO_GETVALUE_METHODDEF _IO_BYTESIO_SEEK_METHODDEF diff --git a/Modules/_io/clinic/bytesio.c.h b/Modules/_io/clinic/bytesio.c.h index 6595dc937bbcf0..ac7dd8733b6fff 100644 --- a/Modules/_io/clinic/bytesio.c.h +++ b/Modules/_io/clinic/bytesio.c.h @@ -287,6 +287,52 @@ _io_BytesIO_read1(PyObject *self, PyObject *const *args, Py_ssize_t nargs) return return_value; } +PyDoc_STRVAR(_io_BytesIO_peek__doc__, +"peek($self, size=1, /)\n" +"--\n" +"\n" +"Return bytes from the stream without advancing the position.\n" +"\n" +"If the size argument is negative, read until EOF is reached.\n" +"Return an empty bytes object at EOF."); + +#define _IO_BYTESIO_PEEK_METHODDEF \ + {"peek", _PyCFunction_CAST(_io_BytesIO_peek), METH_FASTCALL, _io_BytesIO_peek__doc__}, + +static PyObject * +_io_BytesIO_peek_impl(bytesio *self, Py_ssize_t size); + +static PyObject * +_io_BytesIO_peek(PyObject *self, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + Py_ssize_t size = 1; + + if (!_PyArg_CheckPositional("peek", nargs, 0, 1)) { + goto exit; + } + if (nargs < 1) { + goto skip_optional; + } + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[0]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + size = ival; + } +skip_optional: + return_value = _io_BytesIO_peek_impl((bytesio *)self, size); + +exit: + return return_value; +} + PyDoc_STRVAR(_io_BytesIO_readline__doc__, "readline($self, size=-1, /)\n" "--\n" @@ -637,4 +683,4 @@ _io_BytesIO___init__(PyObject *self, PyObject *args, PyObject *kwargs) exit: return return_value; } -/*[clinic end generated code: output=daa81dfdae5ccc57 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=396250eb9869a0b4 input=a9049054013a1b77]*/