diff --git a/tools/update_compids.sh b/tools/update_compids.sh
index 332fb16..38d1fff 100755
--- a/tools/update_compids.sh
+++ b/tools/update_compids.sh
path=en-us/specification/assigned-numbers/company-identifiers
# Use "iconv -c" to strip unwanted unicode characters
-# Also strip <input> tags of type checkbox because html2text generates UTF-8
-# for them in some distros even when using -ascii (e.g. Fedora 18)
+# Fixups:
+# - strip <input> tags of type "checkbox" because html2text generates UTF-8 for
+# them in some distros even when using -ascii (e.g. Fedora)
+# - replace " " (non-breaking space) with whitespace manually, because
+# some versions incorrectly convert it into "\xC2\xA0"
curl https://www.bluetooth.org/$path | iconv -c -f utf8 -t ascii | \
- sed '/<input.*type="checkbox"/d' | \
+ sed '/<input.*type="checkbox"/d; s/ / /g' | \
html2text -ascii -o identifiers.txt >/dev/null
-# Some versions of html2text do not replace & (e.g. Fedora 18)
+# Some versions of html2text do not replace & (e.g. Fedora)
sed -i 's/&/\&/g' identifiers.txt
sed -n '/^const char \*bt_compidtostr(int compid)/,/^}/p' \
echo "ERROR: could not parse company IDs from bluetooth.org" >&2
exit 1
fi
+if [ -n "$(tr -d '[:print:]\t\n' < new.c)" ]; then
+ echo -n "ERROR: invalid non-ASCII characters found while parsing" >&2
+ echo -n " company IDs. Please identify offending sequence and fix" >&2
+ echo " tools/update_compids.sh accordingly." >&2
+ exit 1
+fi
echo -e '\tcase 65535:\n\t\treturn "internal use";' >> new.c
echo -e '\tdefault:\n\t\treturn "not assigned";\n\t}\n}' >> new.c