22804888 action parsing broken when newlines are present
22804939 manifests should support line-continuation for a single attribute
--- a/src/modules/actions/_actions.c Tue Feb 23 22:59:55 2016 -0800
+++ b/src/modules/actions/_actions.c Fri Feb 26 17:32:03 2016 -0800
@@ -20,11 +20,12 @@
*/
/*
- * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
*/
#include <Python.h>
+#include <stdbool.h>
#include <string.h>
static PyObject *MalformedActionError;
@@ -48,7 +49,7 @@
static const char *nohash = "action type doesn't allow payload";
static inline int
-add_to_attrs(PyObject *attrs, PyObject *key, PyObject *attr)
+add_to_attrs(PyObject *attrs, PyObject *key, PyObject *attr, bool concat)
{
int ret;
PyObject *list;
@@ -57,8 +58,41 @@
if (av == NULL)
return (PyDict_SetItem(attrs, key, attr));
- if (PyList_CheckExact(av))
+ if (PyList_CheckExact(av)) {
+ if (concat) {
+ Py_ssize_t len;
+ PyObject *str, *oldstr;
+
+ /*
+ * PyList_GET_ITEM() returns a borrowed reference.
+ * We grab a reference to that string because
+ * PyString_Concat() will steal one, and the list needs
+ * to have one around for when we call into
+ * PyList_SetItem(). PyString_Concat() returns a new
+ * object in str with a new reference, which we must
+ * *not* decref after putting into the list.
+ */
+ len = PyList_GET_SIZE(av);
+ oldstr = str = PyList_GET_ITEM(av, len - 1);
+ Py_INCREF(oldstr);
+ /* decrefing "attr" is handled by caller */
+ PyString_Concat(&str, attr);
+ if (str == NULL)
+ return (-1);
+ return (PyList_SetItem(av, len - 1, str));
+ }
+
return (PyList_Append(av, attr));
+ } else if (concat) {
+ Py_INCREF(av);
+ /* decrefing "attr" is handled by caller */
+ PyString_Concat(&av, attr);
+ if (av == NULL)
+ return (-1);
+ ret = PyDict_SetItem(attrs, key, av);
+ Py_DECREF(av);
+ return (ret);
+ }
if ((list = PyList_New(2)) == NULL)
return (-1);
@@ -95,6 +129,11 @@
}
}
+/*
+ * Note that action parsing does not support line-continuation ('\'); that
+ * support is provided by the Manifest class.
+ */
+
/*ARGSUSED*/
static PyObject *
fromstr(PyObject *self, PyObject *args, PyObject *kwdict)
@@ -108,6 +147,7 @@
int i, ks, vs, keysize;
int smlen, smpos;
int hash_allowed;
+ bool concat = false;
char quote;
PyObject *act_args = NULL;
PyObject *act_class = NULL;
@@ -122,7 +162,7 @@
UQVAL, /* unquoted value */
QVAL, /* quoted value */
WS /* whitespace */
- } state;
+ } state, prevstate;
/*
* If malformed() or invalid() are used, CLEANUP_REFS can only be used
@@ -161,7 +201,7 @@
return (NULL);
}
- s = strpbrk(str, " \t");
+ s = strpbrk(str, " \t\n");
i = strl;
if (s == NULL) {
@@ -179,7 +219,7 @@
typestrl = s - str;
hash_allowed = 0;
if (typestrl == 4) {
- if (strncmp(str, "file", 4) == 0) {
+ if (strncmp(str, "file", 4) == 0) {
act_class = aclass_file;
hash_allowed = 1;
} else if (strncmp(str, "link", 4) == 0)
@@ -202,16 +242,16 @@
if (strncmp(str, "hardlink", 8) == 0)
act_class = aclass_hardlink;
} else if (typestrl == 7) {
- if (strncmp(str, "license", 7) == 0) {
+ if (strncmp(str, "license", 7) == 0) {
act_class = aclass_license;
hash_allowed = 1;
} else if (strncmp(str, "unknown", 7) == 0)
act_class = aclass_unknown;
} else if (typestrl == 9) {
- if (strncmp(str, "signature", 9) == 0) {
+ if (strncmp(str, "signature", 9) == 0) {
act_class = aclass_signature;
hash_allowed = 1;
- }
+ }
} else if (typestrl == 5) {
if (strncmp(str, "group", 5) == 0)
act_class = aclass_group;
@@ -236,7 +276,7 @@
}
ks = vs = typestrl;
- state = WS;
+ prevstate = state = WS;
if ((attrs = PyDict_New()) == NULL) {
PyMem_Free(str);
return (NULL);
@@ -246,7 +286,7 @@
keysize = i - ks;
keystr = &str[ks];
- if (str[i] == ' ' || str[i] == '\t') {
+ if (str[i] == ' ' || str[i] == '\t' || str[i] == '\n') {
if (PyDict_Size(attrs) > 0 || hash != NULL) {
malformed("whitespace in key");
CLEANUP_REFS;
@@ -264,13 +304,13 @@
return (NULL);
}
if (!hash_allowed) {
- invalid(nohash);
- CLEANUP_REFS;
- return (NULL);
+ invalid(nohash);
+ CLEANUP_REFS;
+ return (NULL);
}
hashstr = strndup(keystr, keysize);
+ prevstate = state;
state = WS;
-
}
} else if (str[i] == '=') {
#if PY_MAJOR_VERSION >= 3
@@ -291,8 +331,8 @@
CLEANUP_REFS;
return (NULL);
}
- if (!hash_allowed && keysize == 4 &&
- strncmp(keystr, "hash", keysize) == 0) {
+ if (!hash_allowed && keysize == 4 &&
+ strncmp(keystr, "hash", keysize) == 0) {
invalid(nohash);
CLEANUP_REFS;
return (NULL);
@@ -318,14 +358,17 @@
return (NULL);
}
if (str[i] == '\'' || str[i] == '\"') {
+ prevstate = state;
state = QVAL;
quote = str[i];
vs = i + 1;
- } else if (str[i] == ' ' || str[i] == '\t') {
+ } else if (str[i] == ' ' || str[i] == '\t' ||
+ str[i] == '\n') {
malformed("missing value");
CLEANUP_REFS;
return (NULL);
} else {
+ prevstate = state;
state = UQVAL;
vs = i;
}
@@ -341,7 +384,9 @@
/*
* "slashmap" is a list of the positions of the
* backslashes that need to be removed from the
- * final attribute string.
+ * final attribute string; it is not used for
+ * line continuation which is only supported
+ * by the Manifest class.
*/
if (slashmap == NULL) {
smlen = 16;
@@ -377,6 +422,7 @@
slashmap[smpos] = -1;
}
} else if (str[i] == quote) {
+ prevstate = state;
state = WS;
if (slashmap != NULL) {
char *sattr;
@@ -448,15 +494,18 @@
#else
PyString_InternInPlace(&attr);
#endif
- if (add_to_attrs(attrs, key,
- attr) == -1) {
+
+ if (add_to_attrs(attrs, key, attr,
+ concat) == -1) {
CLEANUP_REFS;
return (NULL);
}
+ concat = false;
}
}
} else if (state == UQVAL) {
- if (str[i] == ' ' || str[i] == '\t') {
+ if (str[i] == ' ' || str[i] == '\t' || str[i] == '\n') {
+ prevstate = state;
state = WS;
Py_XDECREF(attr);
#if PY_MAJOR_VERSION >= 3
@@ -481,22 +530,33 @@
#else
PyString_InternInPlace(&attr);
#endif
- if (add_to_attrs(attrs, key,
- attr) == -1) {
+ if (add_to_attrs(attrs, key, attr,
+ false) == -1) {
CLEANUP_REFS;
return (NULL);
}
}
}
} else if (state == WS) {
- if (str[i] != ' ' && str[i] != '\t') {
+ if (str[i] != ' ' && str[i] != '\t' && str[i] != '\n') {
state = KEY;
ks = i;
if (str[i] == '=') {
malformed("missing key");
CLEANUP_REFS;
return (NULL);
+ } else if (prevstate == QVAL &&
+ (str[i] == '\'' || str[i] == '\"')) {
+ /*
+ * We find ourselves with two adjacent
+ * quoted values, which we concatenate.
+ */
+ state = QVAL;
+ quote = str[i];
+ vs = i + 1;
+ concat = true;
}
+ prevstate = WS;
}
}
}
@@ -527,7 +587,7 @@
#else
PyString_InternInPlace(&attr);
#endif
- if (add_to_attrs(attrs, key, attr) == -1) {
+ if (add_to_attrs(attrs, key, attr, false) == -1) {
CLEANUP_REFS;
return (NULL);
}
@@ -612,14 +672,14 @@
#if PY_MAJOR_VERSION >= 3
if ((m = PyModule_Create(&actionmodule)) == NULL)
- return NULL;
+ return (NULL);
#else
/*
* Note that module initialization functions are void and may not return
* a value. However, they should set an exception if appropriate.
*/
if (Py_InitModule("_actions", methods) == NULL)
- return NULL;
+ return (NULL);
#endif
/*
@@ -631,17 +691,17 @@
*/
if ((sys = PyImport_ImportModule("sys")) == NULL)
- return NULL;
+ return (NULL);
if ((sys_modules = PyObject_GetAttrString(sys, "modules")) == NULL)
- return NULL;
+ return (NULL);
if ((pkg_actions = PyDict_GetItemString(sys_modules, "pkg.actions"))
== NULL) {
/* No exception is set */
PyErr_SetString(PyExc_KeyError, "pkg.actions");
Py_DECREF(sys_modules);
- return NULL;
+ return (NULL);
}
Py_DECREF(sys_modules);
@@ -670,7 +730,7 @@
if ((action_types = PyObject_GetAttrString(pkg_actions,
"types")) == NULL) {
PyErr_SetString(PyExc_KeyError, "pkg.actions.types missing!");
- return NULL;
+ return (NULL);
}
/*
@@ -683,7 +743,7 @@
PyErr_SetString(PyExc_KeyError, \
"Action type class missing: " name); \
Py_DECREF(action_types); \
- return NULL; \
+ return (NULL); \
}
cache_class(aclass_attribute, "set");
@@ -702,14 +762,14 @@
Py_DECREF(action_types);
- return m;
+ return (m);
}
#if PY_MAJOR_VERSION >= 3
PyMODINIT_FUNC
PyInit__actions(void)
{
- return moduleinit();
+ return (moduleinit());
}
#else
PyMODINIT_FUNC
--- a/src/modules/manifest.py Tue Feb 23 22:59:55 2016 -0800
+++ b/src/modules/manifest.py Fri Feb 26 17:32:03 2016 -0800
@@ -21,7 +21,7 @@
#
#
-# Copyright (c) 2007, 2015, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2007, 2016, Oracle and/or its affiliates. All rights reserved.
#
from collections import namedtuple, defaultdict
@@ -972,6 +972,24 @@
return alldups
def __content_to_actions(self, content):
+ """Parse manifest content, stripping line-continuation
+ characters from the input as it is read; this results in actions
+ with values across multiple lines being passed to the
+ action parsing code whitespace-separated instead.
+
+ For example:
+
+ set name=pkg.summary \
+ value="foo"
+ set name=pkg.description value="foo " \
+ "bar baz"
+
+ ...will each be passed to action parsing as:
+
+ set name=pkg.summary value="foo"
+ set name=pkg.description value="foo " "bar baz"
+ """
+
accumulate = ""
lineno = 0
errors = []
--- a/src/tests/api/t_action.py Tue Feb 23 22:59:55 2016 -0800
+++ b/src/tests/api/t_action.py Fri Feb 26 17:32:03 2016 -0800
@@ -261,10 +261,18 @@
self.assertEqual(a.must_accept, True)
self.assertEqual(a.must_display, False)
+ def __assert_action_str(self, astr, expected, expattrs):
+ """Private helper function for action stringification
+ testing."""
+ act = action.fromstr(astr)
+ self.assertEqualDiff(expected, str(act))
+ self.assertEqualDiff(expattrs, act.attrs)
+
def test_action_tostr(self):
"""Test that actions convert to strings properly. This means
that we can feed the resulting string back into fromstr() and
- get an identical action back."""
+ get an identical action back (excluding a few cases detailed in
+ the test)."""
for s in self.act_strings:
self.debug(str(s))
@@ -276,7 +284,7 @@
self.debug("a2 " + str(a2))
self.assert_(not a.different(a2))
- # The one place that invariant doesn't hold is when you specify
+ # The first case that invariant doesn't hold is when you specify
# the payload hash as the named attribute "hash", in which case
# the resulting re-stringification emits the payload hash as a
# positional attribute again ...
@@ -306,6 +314,63 @@
self.assert_(a.hash == v)
self.assert_(k in str(a))
+ # The attributes are verified separately from the stringified
+ # action in the tests below to ensure that the attributes were
+ # parsed independently and not as a single value (e.g.
+ # 'file path=etc/foo\nfacet.debug=true' is parsed as having a
+ # path attribute and a facet.debug attribute).
+
+ # The next case that invariant doesn't hold is when you have
+ # multiple, quoted values for a single attribute (this case
+ # primarily exists for use with line-continuation support
+ # offered by the Manifest class).
+ expected = 'set name=pkg.description value="foo bar baz"'
+ expattrs = { 'name': 'pkg.description', 'value': 'foo bar baz' }
+ for astr in (
+ "set name=pkg.description value='foo ''bar ''baz'",
+ "set name=pkg.description value='foo ' 'bar ' 'baz'",
+ 'set name=pkg.description value="foo " "bar " "baz"'):
+ self.__assert_action_str(astr, expected, expattrs)
+
+ expected = "set name=pkg.description value='foo \"bar\" baz'"
+ expattrs = { 'name': 'pkg.description',
+ 'value': 'foo "bar" baz' }
+ for astr in (
+ "set name=pkg.description value='foo \"bar\" ''baz'",
+ "set name=pkg.description value='foo \"bar\" '\"baz\""):
+ self.__assert_action_str(astr, expected, expattrs)
+
+ # The next case that invariant doesn't hold is when there are
+ # multiple whitespace characters between attributes or after the
+ # action type.
+ expected = 'set name=pkg.description value=foo'
+ expattrs = { 'name': 'pkg.description', 'value': 'foo' }
+ for astr in (
+ "set name=pkg.description value=foo",
+ "set name=pkg.description value=foo",
+ "set name=pkg.description value=foo",
+ "set\n name=pkg.description \nvalue=foo",
+ "set\t\nname=pkg.description\t\nvalue=foo"):
+ # To force stressing the parsing logic a bit more, we
+ # parse an action with a multi-value attribute that
+ # needs concatention each time before we parse a
+ # single-value attribute that needs concatenation.
+ #
+ # This simulates a refcount bug that was found during
+ # development and serves as an extra stress-test.
+ self.__assert_action_str(
+ 'set name=multi-value value=bar value="foo ""baz"',
+ 'set name=multi-value value=bar value="foo baz"',
+ { 'name': 'multi-value',
+ 'value': ['bar', 'foo baz'] })
+
+ self.__assert_action_str(astr, expected, expattrs)
+
+ astr = 'file path=etc/foo\nfacet.debug=true'
+ expected = 'file NOHASH facet.debug=true path=etc/foo'
+ expattrs = { 'path': 'etc/foo', 'facet.debug': 'true' }
+ self.__assert_action_str(astr, expected, expattrs)
+
def test_action_sig_str(self):
sig_act = action.fromstr(
"signature 54321 algorithm=baz")
@@ -351,6 +416,8 @@
action.fromstr(text)
except action.MalformedActionError as e:
assert e.actionstr == text
+ self.debug(text)
+ self.debug(str(e))
malformed = True
# If the action isn't malformed, something is wrong.
@@ -397,10 +464,17 @@
# Missing value
self.assertMalformed("file 1234 path=/tmp/foo broken=")
self.assertMalformed("file 1234 path=/tmp/foo broken= ")
+ self.assertMalformed("file 1234 path=/tmp/foo broken=\t")
+ self.assertMalformed("file 1234 path=/tmp/foo broken=\n")
self.assertMalformed("file 1234 path=/tmp/foo broken")
# Whitespace in key
self.assertMalformed("file 1234 path=/tmp/foo bro ken")
+ self.assertMalformed("file 1234 path=/tmp/foo\tbro\tken")
+ self.assertMalformed("file 1234 path=/tmp/foo\nbro\nken")
+ self.assertMalformed("file 1234 path ='/tmp/foo")
+ self.assertMalformed("file 1234 path\t=/tmp/foo")
+ self.assertMalformed("file 1234 path\n=/tmp/foo")
# Attribute value is invalid.
self.assertInvalid("depend type=unknown [email protected]")
--- a/src/tests/api/t_manifest.py Tue Feb 23 22:59:55 2016 -0800
+++ b/src/tests/api/t_manifest.py Fri Feb 26 17:32:03 2016 -0800
@@ -21,7 +21,7 @@
# CDDL HEADER END
#
-# Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
import unittest
import tempfile
@@ -62,7 +62,7 @@
set com.sun,test=false
set com.sun,data=true
depend type=require fmri=pkg:/library/libc
-file fff555ff9 mode=0555 owner=sch group=staff \
+file fff555ff9 mode=0555 owner=sch group=staff \\
path=/usr/bin/i386/sort isa=i386
file eeeaaaeee mode=0555 owner=sch group=staff path=/usr/bin/amd64/sort isa=amd64
@@ -72,21 +72,27 @@
"""
self.m2_signatures = {
- "sha-1": "e600f5e48a838b11ed73fd4afedfc35638ab0bbf"
+ "sha-1": "7272cb2461a8a4ccf958b7a7f13f3ae20cbb0212"
}
#
- # Try to keep this up to date with on of
+ # Try to keep this up to date with one of
# every action type.
#
self.diverse_contents = """\
set com.sun,test=false
+set name=pkg.description value="The Z Shell (zsh) is a Bourne-like shell " \\
+ "designed for interactive use, although it is also a powerful scripting " \\
+ "language. Many of the useful features of bash, ksh, and tcsh were " \\
+ "incorporated into zsh, but many original features were added."
depend type=require fmri=pkg:/library/libc
file fff555ff9 mode=0555 owner=sch group=staff path=/usr/bin/i386/sort isa=i386
dir owner=root path=usr/bin group=bin mode=0755 variant.arch=i386 variant.arch=sparc
dir owner=root path="opt/dir with spaces in value" group=bin mode=0755
-dir owner=root path="opt/dir with whitespaces in value" group=bin mode=0755
-link path=usr/lib/amd64/libjpeg.so \
+dir owner=root path="opt/dir with " \\
+ "whitespaces " \\
+ "in value" group=bin mode=0755
+link path=usr/lib/amd64/libjpeg.so \\
target=libjpeg.so.62.0.0
hardlink path=usr/bin/amd64/rksh93 target=ksh93 variant.opensolaris.zone=global
group groupname=testgroup gid=10
@@ -96,7 +102,7 @@
set com.sun,test=false
set com.sun,data=true
depend type=require fmri=pkg:/library/libc
-file fff555ff9 mode=0555 owner=sch group=staff path=/usr/bin/i386/sort \
+file fff555ff9 mode=0555 owner=sch group=staff path=/usr/bin/i386/sort \\
isa=i386
"""
@@ -119,10 +125,11 @@
# Index raises an exception if the substring isn't found;
# if that were to happen, the test case would then fail.
- str(self.m1).index("fmri=pkg:/library/libc")
- str(self.m1).index("owner=sch")
- str(self.m1).index("group=staff")
- str(self.m1).index("isa=i386")
+ mstr = str(self.m1)
+ mstr.index("fmri=pkg:/library/libc")
+ mstr.index("owner=sch")
+ mstr.index("group=staff")
+ mstr.index("isa=i386")
# Verify set_content with a byte string with unicode data
# works.
@@ -141,6 +148,26 @@
self.assertEqual(bstr, output)
self.assert_(isinstance(output, str))
+ # Verify Manifests using line continuation '\' are parsed as
+ # expected.
+ m = manifest.Manifest()
+ m.set_content(self.diverse_contents)
+ expected = sorted('''\
+set name=com.sun,test value=false
+set name=pkg.description value="The Z Shell (zsh) is a Bourne-like shell designed for interactive use, although it is also a powerful scripting language. Many of the useful features of bash, ksh, and tcsh were incorporated into zsh, but many original features were added."
+depend fmri=pkg:/library/libc type=require
+group gid=10 groupname=testgroup
+dir group=bin mode=0755 owner=root path="opt/dir with spaces in value"
+dir group=bin mode=0755 owner=root path="opt/dir with whitespaces in value"
+dir group=bin mode=0755 owner=root path=usr/bin variant.arch=i386 variant.arch=sparc
+file fff555ff9 group=staff isa=i386 mode=0555 owner=sch path=usr/bin/i386/sort
+hardlink path=usr/bin/amd64/rksh93 target=ksh93 variant.opensolaris.zone=global
+link path=usr/lib/amd64/libjpeg.so target=libjpeg.so.62.0.0
+'''.splitlines())
+ actual = sorted(l.strip() for l in m.as_lines())
+
+ self.assertEqualDiff(expected, actual)
+
def test_diffs1(self):
""" humanized_differences runs to completion """
@@ -281,14 +308,14 @@
#
# Expect to see something -> None differences
#
- self.assertEqual(len(diffs), 9)
+ self.assertEqual(len(diffs), 10)
for d in diffs:
self.assertEqual(type(d[1]), type(None))
#
# Expect to see None -> something differences
#
- self.assertEqual(len(diffs2), 9)
+ self.assertEqual(len(diffs2), 10)
for d in diffs2:
self.assertEqual(type(d[0]), type(None))
self.assertNotEqual(type(d[1]), type(None))
@@ -388,7 +415,9 @@
set name=pkg.fmri value=pkg:/[email protected]
dir owner=root path=usr/bin group=bin mode=0755 variant.arch=i386
dir owner=root path="opt/dir with spaces in value" group=bin mode=0755
-dir owner=root path="opt/dir with whitespaces in value" group=bin mode=0755 variant.debug.osnet=true
+dir owner=root path="opt/dir with " \\
+ "whitespaces " \\
+ "in value" group=bin mode=0755 variant.debug.osnet=true
"""
def setUp(self):
--- a/src/tests/api/t_pkg_api_install.py Tue Feb 23 22:59:55 2016 -0800
+++ b/src/tests/api/t_pkg_api_install.py Fri Feb 26 17:32:03 2016 -0800
@@ -21,7 +21,7 @@
#
#
-# Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
#
import testutils
@@ -963,7 +963,7 @@
[pfmri.pkg_name])
for bad_act in (
- 'set name=description value="" \" my desc \" ""',
+ 'set name=description value="" \" my desc ""',
"set name=com.sun.service.escalations value="):
self.debug("Testing with bad action "
"'{0}'.".format(bad_act))
--- a/src/tests/cli/t_pkg_install.py Tue Feb 23 22:59:55 2016 -0800
+++ b/src/tests/cli/t_pkg_install.py Fri Feb 26 17:32:03 2016 -0800
@@ -961,7 +961,7 @@
# Now attempt to corrupt the client's copy of the
# manifest such that actions are malformed.
for bad_act in (
- 'set name=description value="" \" my desc \" ""',
+ 'set name=description value="" \" my desc ""',
"set name=com.sun.service.escalations value="):
self.debug("Testing with bad action "
"'{0}'.".format(bad_act))