The purpose of this module is to prepare the data used by Module:Convert to allow conversion between units of measurement.

Usage: Put one of the following lines (with nothing else) in a sandbox:

  • {{#invoke:convert/makeunits|makeunits}}
  • {{subst:#invoke:convert/makeunits|makeunits}}

Previewing the sandbox should display the wikitext that needs to be copied and pasted into Module:Convert/data. If a problem occurs, warning messages will be displayed to indicate that the unit definitions need to be fixed.

By default, the module reads the unit definitions from Module:Convert/documentation/conversion data/doc. For testing purposes, it is possible to specify that the definitions are read from another page, for example, User:Johnuniq/sandbox2, by specifying the wanted title:

  • {{#invoke:convert/makeunits|makeunits|User:Johnuniq/sandbox2}}
  • {{subst:#invoke:convert/makeunits|makeunits|User:Johnuniq/sandbox2}}

The module contains table specials which is used to insert a small amount of "built-in" data that is not currently defined in the input wikitext.

The module reads data from Module:Convert/text to allow localization of the table of units for use on another wiki.


-- This module generates the wiktext required at [[Module:Convert/data]]
-- by reading and processing the wikitext of the unit definitions at
-- [[Module:Convert/documentation/conversion data/doc]].
--
-- Usage: Put the following line (with nothing else) in a sandbox:
-- {{#invoke:convert/makeunits|makeunits}}
-- Previewing the sandbox should display the required wikitext.
--
-- It is also possible to use the following where PAGE is replaced
-- with the title of the page holding the unit definitions.
-- {{#invoke:convert/makeunits|makeunits|PAGE}}
--
-- LATER:
-- If support additional currencies, need to update table:
--     local currency = { ['$'] = true, ['£'] = true }
--
-- Script method:
-- * Read lines, ignoring everything before "== Conversions ==".
-- * Process the following lines:
--   * Find next level-3 heading like "=== Length ===".
--   * Parse each following line starting with "|"
--     (but ignore lines starting with "|-" or "|}".
--   * Split such lines into fields (delimiter "||") and trim
--     leading/trailing whitespace from each field.
--     Remove any "colspan" at front of second field (symbol).
--   * Remove thousand separators (commas) from the scale field.
--     If the scale is a number, do not change it.
--     Otherwise, it should be an expression like "5/9", in
--     which case it is replaced by the value of the expression.
--   * Remove wiki formatting '[[...]]' from the link field.
--   * Remove redundant fields from the unit to reduce size of data table.
--   * Create alternative forms of a unit such as an alias or a combination.
-- * Stop processing when encounter end of text or a line starting
--   with a level-2 heading ("==" but not "===").
-- * Repeat above for each heading listed at prepare_data().
-- * Output Lua source for the units table.
--
-- -- Output has the following form.
-- local all_units = {
--     ["unitcode"] = {                        -- standard format
--         name1    = "singular name",         -- omitted if redundant
--         name1_us = "singular name sp=us",   -- omitted if redundant
--         name2    = "plural name",           -- omitted if redundant
--         name2_us = "plural name sp=us",     -- omitted if redundant
--         symbol   = "symbol",
--         sym_us   = "symbol sp=us",          -- omitted if redundant
--         usename  = 1,                       -- omitted if empty
--         utype    = "unit type",             -- from level-3 heading
--         scale    = 1,                       -- a value, if necessary from evaluating an expression
--         subdivs  = { ["ft"] = { 5280, default = "km" }, ["yd"] = { 1760 } }  -- composite input; omitted if empty
--         link     = "title of article for wikilink",  -- omitted if empty or redundant
--         ...                                 -- other values
--     },
--     ["unitcode"] = {        -- alternative format to generate an alias
--         target   = "unit code",
--         ...                                 -- optional values to override those of target
--     },
--     ["unitcode"] = {        -- alternative format to generate a "per" unit like $/acre or BTU/h
--         per      = {u1, u2},                -- numbered table of unitcodes (u1 may be a currency symbol)
--         ...                                 -- optional values
--     },
--     ["unitcode"] = {        -- alternative format to generate an error message
--         shouldbe = "message that some other unit code should be used",
--     },
--     ["unitcode"] = {        -- alternative format for combination outputs (like 'm ft')
--         combination = {u1, u2, ...},        -- numbered table of unitcodes
--         utype    = "unit type",             -- as for standard format
--     },
--     ["unitcode"] = {        -- alternative format for output multiples (like 'ftin')
--         combination = {u1, u2, ...},        -- numbered table of unitcodes
--         multiple = {f1, f2, ...},           -- numbered table of integer factors
--         utype    = "unit type",             -- as for standard format
--     },
--     ...
-- }

local specials = {
	-- This table is used to add extra fields when defining some units which
	-- require exceptions to normal processing.
	-- Each key is in the local language, while each value is fixed text,
	-- however, this script is NOT edited to localize the keys. Instead,
	-- [[Module:Convert/text]] is edited, and this script replaces sections
	-- of the following with localized definitions, if given.
	-- LATER: It would be better if this was defined in the conversions table.
	utype = {
		-- ["unit type in local language"] = "name_used_in_this_script"
		["fuel efficiency"] = "type_fuel_efficiency",
		["length"] = "type_length",
		["temperature"] = "type_temperature",
		["volume"] = "type_volume",
	},
	ucode = {
		exception = {
			-- ["unit code in local language"] = "name_used_in_module_convert"
			["ft"] = "integer_more_precision",
			["in"] = "subunit_more_precision",
			["lb"] = "integer_more_precision",
		},
		istemperature = {
			-- Common temperature scales (not keVT or MK).
			-- ["unit code in local language"] = 1
			["C"] = true,
			["F"] = true,
			["K"] = true,
			["R"] = true,
		},
		usesymbol = {
			-- Use unit symbol not name if abbr not specified.
			-- ["unit code in local language"] = 1
			["C"] = 1,
			["F"] = 1,
			["K"] = 1,
			["R"] = 1,
			["C-change"] = 1,
			["F-change"] = 1,
			["K-change"] = 1,
		},
		alttype = {
			-- Unit has an alternate type that is a valid conversion.
			-- ["unit code in local language"] = "alternate type in local language"
			["Nm"] = "energy",
			["ftlb"] = "torque",
			["ftlb-f"] = "torque",
			["ftlbf"] = "torque",
			["inlb"] = "torque",
			["inlb-f"] = "torque",
			["inlbf"] = "torque",
			["inoz-f"] = "torque",
			["inozf"] = "torque",
		},
	},
}

-- When a number is formatted as a string, the result can indicate
-- that the number is not valid. For example, on some Lua installs,
-- tostring(1/0) includes "#INF", and tostring(0/0) includes "#IND".
-- On Scribunto, the results are "inf" and "nan" (each includes "n").
-- A testing program can set the global variable is_test_run.
local bad_number_char = is_test_run and '#' or 'n'

local function collection()
	-- Return a table to hold items.
	return {
		n = 0,
		add = function (self, item)
			self.n = self.n + 1
			self[self.n] = item
		end,
		pop = function (self, item)
			if self.n > 0 then
				local top = self[self.n]
				self.n = self.n - 1
				return top
			end
		end,
		join = function (self, sep)
			return table.concat(self, sep or '\n')
		end,
	}
end

local warnings = collection()
local function add_warning(text)
	-- Add a warning that will be inserted above the final result.
	warnings:add(text)
end

---Begin code to evaluate expressions-----------------------------------
-- This is needed because Lua's loadstring() is not available in Scribunto,
-- and each scale value can be specifed as an expression such as "5/9".
-- More complex expressions are supported, including use of parentheses
-- and the binary operators: + - * / ^

local operators = {
	['+'] = { precedence = 1, associativity = 1, func = function (a, b) return a + b end },
	['-'] = { precedence = 1, associativity = 1, func = function (a, b) return a - b end },
	['*'] = { precedence = 2, associativity = 1, func = function (a, b) return a * b end },
	['/'] = { precedence = 2, associativity = 1, func = function (a, b) return a / b end },
	['^'] = { precedence = 3, associativity = 2, func = function (a, b) return a ^ b end },
	['('] = '(',
	[')'] = ')',
}

local function tokenizer(text)
	-- Function 'next' returns the next token which is one of:
	--     number
	--     table (operator)
	--     string ('(' or ')')
	--     nil (end of text)
	-- If invalid, an error is thrown.
	-- The number is unsigned (unary operators are not supported).
	return {
		pos = 1,
		maxpos = #text,
		text = text,
		next = function(self)
			if self.pos <= self.maxpos then
				local p1, p2, hit = self.text:find('^%s*([+%-*/^()])', self.pos)
				if hit then
					self.pos = p2 + 1
					return operators[hit]
				end
				p1, p2, hit = self.text:find('^%s*(%d*%.?%d*[eE][+-]?%d*)', self.pos)
				if not hit then
					p1, p2, hit = self.text:find('^%s*(%d*%.?%d*)', self.pos)
				end
				local value = tonumber(hit)
				if value then
					self.pos = p2 + 1
					return value
				end
				error('invalid number "' .. self.text:sub(self.pos) .. '"', 0)
			end
		end
	}
end

local function evaluate_tokens(tokens, inparens)
	-- Return the value from evaluating tokenized expression, or throw an error.
	local numstack, opstack = collection(), collection()
	local function perform_ops(precedence, associativity)
		while opstack.n > 0 and (opstack[opstack.n].precedence > precedence or
			(opstack[opstack.n].precedence == precedence and associativity == 1)) do
			local rhs = numstack:pop()
			local lhs = numstack:pop()
			if not (rhs and lhs) then error('missing number', 0) end
			local op = opstack:pop()
			numstack:add(op.func(lhs, rhs))
		end
	end
	local token_last
	local function set_state(token_type)
		if token_last == token_type then
			local missing = (token_type == 'number') and 'operator' or 'number'
			error('missing ' .. missing, 0)
		end
		token_last = token_type
	end
	while true do
		local token = tokens:next()
		if type(token) == 'number' then
			set_state('number')
			numstack:add(token)
		elseif type(token) == 'table' then
			set_state('operator')
			perform_ops(token.precedence, token.associativity)
			opstack:add(token)
		elseif token == '(' then
			set_state('number')
			numstack:add(evaluate_tokens(tokens, true))
		elseif token == ')' then
			if inparens then
				break
			end
			error('unbalanced parentheses', 0)
		else
			break
		end
	end
	perform_ops(0)
	if numstack.n > 1 then error('missing operator', 0) end
	if numstack.n < 1 then error('missing number', 0) end
	return numstack:pop()
end

local function evaluate(expression)
	-- Return value (a number) from evaluating expression (a string),
	-- or throw an error if invalid.
	-- This is not bullet proof, but it should support the expressions used.
	return evaluate_tokens(tokenizer(expression))
end
---End code to evaluate expressions-------------------------------------
---Begin code adapted from Module:Convert-------------------------------

local SIprefixes, eng_scales
local plural_suffix = 's'

local function shallow_copy(t)
	-- Return a shallow copy of t.
	-- Do not need the features and overhead of mw.clone() provided by Scribunto.
	local result = {}
	for k, v in pairs(t) do
		result[k] = v
	end
	return result
end

local unit_mt = {
	-- Metatable to get missing values for a unit that does not accept SI prefixes,
	-- or a for a unit that accepts prefixes but where no prefix was used.
	-- In the latter case, and before use, fields symbol, name1, name1_us
	-- must be set from _symbol, _name1, _name1_us respectively.
	__index = function (self, key)
		local value
		if key == 'name1' or key == 'sym_us' then
			value = self.symbol
		elseif key == 'name2' then
			value = self.name1 .. plural_suffix
		elseif key == 'name1_us' then
			value = self.name1
			if not rawget(self, 'name2_us') then
				-- If name1_us is 'foot', do not make name2_us by appending plural_suffix.
				self.name2_us = self.name2
			end
		elseif key == 'name2_us' then
			local raw1_us = rawget(self, 'name1_us')
			if raw1_us then
				value = raw1_us .. plural_suffix
			else
				value = self.name2
			end
		elseif key == 'link' then
			value = self.name1
		else
			return nil
		end
		rawset(self, key, value)
		return value
	end
}

local unit_prefixed_mt = {
	-- Metatable to get missing values for a unit that accepts SI prefixes,
	-- and where a prefix has been used.
	-- Before use, fields si_name, si_prefix must be defined.
	__index = function (self, key)
		local value
		if key == 'symbol' then
			value = self.si_prefix .. self._symbol
		elseif key == 'sym_us' then
			value = self.symbol  -- always the same as sym_us for prefixed units
		elseif key == 'name1' then
			local pos = rawget(self, 'prefix_position') or 1
			value = self._name1
			value = value:sub(1, pos - 1) .. self.si_name .. value:sub(pos)
		elseif key == 'name2' then
			value = self.name1 .. plural_suffix
		elseif key == 'name1_us' then
			value = rawget(self, '_name1_us')
			if value then
				local pos = rawget(self, 'prefix_position') or 1
				value = value:sub(1, pos - 1) .. self.si_name .. value:sub(pos)
			else
				value = self.name1
			end
		elseif key == 'name2_us' then
			if rawget(self, '_name1_us') then
				value = self.name1_us .. plural_suffix
			else
				value = self.name2
			end
		elseif key == 'link' then
			value = self.name1
		else
			return nil
		end
		rawset(self, key, value)
		return value
	end
}

local function lookup(units, unitcode, sp, what)
	-- Return a copy of the unit if found, or return nil.
	-- In this cut-down code, sp is always nil, and what is ignored.
	local t = units[unitcode]
	if t then
		if t.shouldbe then
			return nil
		end
		local result = shallow_copy(t)
		if result.prefixes then
			result.symbol = result._symbol
			result.name1 = result._name1
			result.name1_us = result._name1_us
		end
		return setmetatable(result, unit_mt)
	end
	for plen = 2, 1, -1 do
		-- Look for an SI prefix; should never occur with an alias.
		-- Check for longer prefix first ('dam' is decametre).
		-- Micro (µ) is two bytes in utf-8, so is found with plen = 2.
		local prefix = string.sub(unitcode, 1, plen)
		local si = SIprefixes[prefix]
		if si then
			local t = units[unitcode:sub(plen+1)]
			if t and t.prefixes then
				local result = shallow_copy(t)
				if (sp == 'us' or t.sp_us) and si.name_us then
					result.si_name = si.name_us
				else
					result.si_name = si.name
				end
				result.si_prefix = si.prefix or prefix
				-- In this script, each scale is a string.
				result.scale = tostring(tonumber(t.scale) * 10 ^ (si.exponent * t.prefixes))
				result.prefixes = nil  -- a prefixed unit does not take more prefixes (in this script, the returned unit may be added to the list of units)
				return setmetatable(result, unit_prefixed_mt)
			end
		end
	end
	local exponent, baseunit = unitcode:match('^e(%d+)(.*)')
	if exponent then
		local engscale = eng_scales[exponent]
		if engscale then
			local result = lookup(units, baseunit, sp, 'no_combination')
			if not result then return nil end
			if result.engscale == nil then
				result.defkey = unitcode  -- key to lookup default exception
				engscale.exponent = exponent
				result.engscale = engscale
				-- Do not set result.scale as this code is called for units where that is not set.
				return result
			end
		end
	end
	return nil
end

local function evaluate_condition(value, condition)
	-- Return true or false from applying a conditional expression to value,
	-- or throw an error if invalid.
	-- A very limited set of expressions is supported:
	--    v < 9
	--    v * 9 < 9
	-- where
	--    'v' is replaced with value
	--    9 is any number (as defined by Lua tonumber)
	--    '<' can also be '<=' or '>' or '>='
	-- In addition, the following form is supported:
	--    LHS and RHS
	-- where
	--    LHS, RHS = any of above expressions.
	local function compare(value, text)
		local arithop, factor, compop, limit = text:match('^%s*v%s*([*]?)(.-)([<>]=?)(.*)$')
		if arithop == nil then
			error('Invalid default expression.', 0)
		elseif arithop == '*' then
			factor = tonumber(factor)
			if factor == nil then
				error('Invalid default expression.', 0)
			end
			value = value * factor
		end
		limit = tonumber(limit)
		if limit == nil then
			error('Invalid default expression.', 0)
		end
		if compop == '<' then
			return value < limit
		elseif compop == '<=' then
			return value <= limit
		elseif compop == '>' then
			return value > limit
		elseif compop == '>=' then
			return value >= limit
		end
		error('Invalid default expression.', 0)  -- should not occur
	end
	local lhs, rhs = condition:match('^(.-%W)and(%W.*)')
	if lhs == nil then
		return compare(value, condition)
	end
	return compare(value, lhs) and compare(value, rhs)
end

---End code adapted from Module:Convert---------------------------------

local function strip(text)
	-- Return text with no leading/trailing whitespace.
	return text:match("^%s*(.-)%s*$")
end

local function empty(text)
	-- Return true if text is nil or empty (assuming a string).
	return text == nil or text == ''
end

-- Tables of units: k = unit code, v = unit table.
local units_index = {}  -- all units: normal, alias, per, combination, or multiple
local alias_index = {}  -- all aliases (to detect attempts to define more than once)
local per_index = {}    -- all "per" units (to detect attempts to define more than once)

local function get_unit(ucode)
	-- Look up unit code in our cache of units.
	if empty(ucode) then
		return nil
	end
	return lookup(units_index, ucode)
end

local overrides = {}  -- read from input for unit codes that should not be checked for a duplicate

local function insert_unique_unit(data, unit, index)
	-- After inserting any required built-in data, insert the unit into the
	-- data table and (if index not nil) add to index,
	-- but not if the unit code is already defined.
	local ucode = unit.unitcode
	local known = get_unit(ucode)
	if known and not overrides[ucode] then
		error('Unit code "' .. ucode .. '" has already been defined.', 0)
	end
	for item, t in pairs(specials.ucode) do
		unit[item] = t[ucode]
	end
	if index then
		index[ucode] = unit
	end
	table.insert(data, unit)
end

local function check_condition(condition)
	-- Return true if condition appears to be valid; otherwise return false.
	for _, value in ipairs({ 0, 0.1, 1, 1.1, 10, 100, 1000, 1e4, 1e5 }) do
		local success, result = pcall(evaluate_condition, value, condition)
		if not success then
			return false
		end
	end
	return true
end

local function check_default_expression(default, ucode)
	-- Return a numbered table of names present in param default
	-- (two names if an expression, or one name (param default) otherwise).
	-- Throw an error if a problem occurs.
	-- An expression uses pipe-delimited fields with 'v' representing
	-- the input value for the conversion.
	-- Example (suffix is optional): 'v < 120 ! small ! big ! suffix'
	-- returns { 'smallsuffix', 'bigsuffix' }.
	if default:find('!', 1, true) == nil then
		return { default }
	end
	local t = {}
	for item in (default .. '!'):gmatch('%s*(.-)%s*!') do
		t[#t+1] = item  -- split on '!', removing leading/trailing whitespace
	end
	if not (#t == 3 or #t == 4) then
		error('Default output "' .. default .. '" for unit "' .. ucode .. '" should have 2 or 3 "!".', 0)
	end
	local condition, default1, default2 = t[1], t[2], t[3]
	if #t == 4 then
		default1 = default1 .. t[4]
		default2 = default2 .. t[4]
	end
	if not check_condition(condition) then
		error('Invalid condition in default "' .. default .. '" for unit "' .. ucode .. '".', 0)
	end
	return { default1, default2 }
end

local function check_default(default, ucode, utype, unit_table)
	-- Check the given name (or expression) of a default output.
	-- Throw an error if a problem occurs.
	local done = {}
	for _, default in ipairs(check_default_expression(default, ucode)) do
		if done[default] then
			error('Default output "' .. default .. '" for unit "' .. ucode .. '" is repeated.', 0)
		end
		if default == ucode then
			error('Default output for unit "' .. ucode .. '" is the same unit.', 0)
		end
		local default_table = get_unit(default)
		if default_table == nil then
			error('Default output "' .. default .. '" for unit "' .. ucode .. '" is not defined.', 0)
		end
		if not (utype == unit_table.utype and utype == default_table.utype) then
			error('Default output "' .. default .. '" for unit "' .. ucode .. '" has wrong type.', 0)
		end
		done[default] = true
	end
end

local function check_all_defaults(units, maxerrors)
	-- Check each default in units and warn if needed.
	-- This is done after all input data has been processed.
	-- Throw an error if a problem occurs.
	local errors = collection()
	local missing = collection()  -- unitcodes with missing defaults
	for _, unit in ipairs(units) do
		if unit.shouldbe == nil and unit.combination == nil then
			-- This is a standard unit or an alias/per (not shouldbe, combo).
			-- An alias may have a default defined, but it is optional.
			local default = unit.default
			local ucode = unit.unitcode
			if empty(default) then
				if unit.target == nil then  -- unit should have a default
					missing:add(ucode)
				end
			else
				local ok, msg = pcall(check_default, default, ucode, unit.utype, unit)
				if not ok then
					errors:add(msg)
					if errors.n >= maxerrors then
						break
					end
				end
			end
		end
	end
	if errors.n > 0 then
		error(errors:join(), 0)
	end
	if missing.n > 0 then
		add_warning('Units with the following unit codes have no default output.')
		local limit = maxerrors
		for _, v in ipairs(missing) do
			limit = limit - 1
			if limit < 0 then
				add_warning('  (and more not shown)')
				break
			end
			add_warning('  ' .. v)
		end
	end
end

local function check_all_pers(units, maxerrors)
	-- Check each component of each "per" unit and warn if needed.
	-- In addition, add any required extra fields for some types of units.
	-- This is done after all input data has been processed.
	-- Throw an error if a problem occurs.
	local currency = { ['$'] = true, ['£'] = true }
	local errors = collection()
	local function errmsg(text)
		errors:add(text)
	end
	for _, unit in ipairs(units) do
		local per = unit.per
		if per then
			local ucode = unit.unitcode
			if #per ~= 2 then
				errmsg('Unit "' .. ucode .. '" does not have exactly 2 fields in the "per".')
			else
				local types = {}
				for i, v in ipairs(per) do
					if empty(v) then
						errmsg('Unit "' .. ucode .. '" has an empty field in the "per".')
					end
					if not currency[v] then
						local t = get_unit(v)
						if t then
							types[i] = t.utype
						else
							errmsg('Unit "' .. ucode .. '" has undefined unit code "' .. v .. '" in the "per".')
						end
					end
				end
				if specials.utype[unit.utype] == 'type_fuel_efficiency' then
					local expected = { type_volume = 1, type_length = 2 }
					local top_type = expected[specials.utype[types[1]]]
					local bot_type = expected[specials.utype[types[2]]]
					if top_type and bot_type and top_type ~= bot_type then
						unit.iscomplex = true
						if top_type == 1 then
							unit.invert = 1
						else
							unit.invert = -1
						end
					else
						errmsg('Unit "' .. ucode .. '" has invalid unit types for fuel efficiency.')
					end
				end
			end
		end
		if errors.n >= maxerrors then
			break
		end
	end
	if errors.n > 0 then
		error(errors:join(), 0)
	end
end

local function update_units(units, composites)
	-- Update some unit definitions with extra data defined in other sections.
	-- This is done after all input data has been processed.
	for _, unit in ipairs(units) do
		local comp = composites[unit.unitcode]
		if comp then
			unit.subdivs = '{ ' .. table.concat(comp.subdivs, ', ') .. ' }'
		end
	end
end

local function make_override(data)
	-- Return a function which, when called, stores a unit code is not to be
	-- checked for a duplicate. The table is stored in data (also a table).
	return function (utype, fields)
		local ucode = fields[1]
		if ucode == nil then
			error('Missing unit code for an override.', 0)
		end
		if data[ucode] then
			error('Override "' .. ucode .. '" is already defined.', 0)
		end
		data[ucode] = true
	end
end

local function make_default(data)
	-- Return a function which, when called, stores a table that defines a
	-- default output unit. The table is stored in data (also a table).
	local defaults_index = {}  -- to detect attempts to define a default twice
	return function (utype, fields)
		-- Store a table defining a unit.
		-- This is for a unit such as 'kg' that has a default output unit
		-- different from what is defined for the base unit ('g').
		-- Throw an error if a problem occurs.
		local ucode = fields[1]
		local default = fields[2]
		if empty(ucode) then
			error('Defaults section: no unit code specified.', 0)
		end
		if empty(default) then
			error('Defaults section: unit "' .. ucode .. '" has no default specified.', 0)
		end
		if #fields ~= 2 then
			error('Defaults section: unit "' .. ucode .. '" should have two fields only.', 0)
		end
		local unit_table = get_unit(ucode)
		if unit_table == nil then
			error('Defaults section: unit "' .. ucode .. '" is not defined.', 0)
		end
		local symbol = unit_table.defkey or unit_table.symbol
		if empty(symbol) then
			error('Defaults section: unit "' .. ucode .. '" must have a symbol.', 0)
		end
		check_default(default, ucode, utype, unit_table)
		if defaults_index[ucode] then
			error('Defaults section: unit "' .. ucode .. '" has already been specified.', 0)
		end
		defaults_index[ucode] = default
		table.insert(data, { symbol = symbol, default = default })
	end
end

local function clean_link(link, name)
	-- Return link, customary where:
	--   link = given link after removing any '[[...]]' wiki formatting
	--          and removing any leading '+' or '*' or '@';
	--   customary = 1 if leading '+', or 2 if '*' or 3 if '@', or nil
	--   (for extra "US" or "U.S." or "Imperial" customary units link).
	-- Result has leading/trailing whitespace removed, and is nil if empty
	-- or if link matches the name, if a name is specified.
	-- Exception: If the link is empty and the name starts with '[[',
	-- the link is stored as '' (for a unit name which is always linked).
	-- If the resulting link is nil, no link field is stored, and
	-- if a link is required, it will be set from the unit's name.
	if empty(link) then
		return (name:sub(1, 2) == '[[') and '' or nil
	end
	local prefixes = { ['+'] = 1, ['*'] = 2, ['@'] = 3 }
	local customary = prefixes[link:sub(1, 1)]
	if customary then
		link = strip(link:sub(2))
	end
	if link:sub(1, 2) == '[[' then
		link = link:sub(3)
	end
	if link:sub(-2) == ']]' then
		link = link:sub(1, -3)
	end
	link = strip(link)
	if link:sub(1, 1) == '[' or link:sub(-1) == ']' then
		error('Link "' .. link .. '" has wrong number of brackets.', 0)
	end
	if link == '' then
		link = nil
	elseif name then
		local l = link:sub(1, 1):lower() .. link:sub(2)
		local n = name:sub(1, 1):lower() .. name:sub(2)
		if l == n then
			link = nil  -- link == name, ignoring case of first letter
		end
	end
	return link, customary
end

local function make_link(data)
	-- Return a function which, when called, stores a table that defines a
	-- link exception. The table is stored in data (also a table).
	local links_index = {}  -- to detect attempts to define a link twice
	return function (utype, fields)
		-- Store a table defining a unit.
		-- This is for a unit such as 'kg' that has a linked article
		-- different from what is defined for the base unit ('g').
		-- Throw an error if a problem occurs.
		local ucode = fields[1]
		local link = clean_link(fields[2])
		if empty(ucode) then
			error('Missing unit code for a link.', 0)
		end
		if empty(link) then
			error('No link defined for unit "' .. ucode .. '".', 0)
		end
		if #fields ~= 2 then
			error('Row for unit "' .. ucode .. '" link should have two fields only.', 0)
		end
		local unit_table = get_unit(ucode)
		if unit_table == nil then
			error('Unit code "' .. ucode .. '" for a link is not defined.', 0)
		end
		if utype ~= unit_table.utype then
			error('Link exception "' .. ucode .. '" has wrong type.', 0)
		end
		local symbol = unit_table.symbol
		if empty(symbol) then
			error('Unit code "' .. ucode .. '" for a link must have a symbol.', 0)
		end
		if links_index[ucode] then
			error('Link exception "' .. ucode .. '" is already defined.', 0)
		end
		links_index[ucode] = link
		table.insert(data, { symbol = symbol, link = link })
	end
end

local function clean_scale(scale)
	-- Return cleaned scale as a string, after evaluating any expression.
	-- It would be better to retain scale expressions like "5/9" so that
	-- the expression is evaluated on the server and maintains the full
	-- resolution of the server. However, there are many such expressions
	-- in the table of all units, and it seems pointless to require the
	-- server to evaluate all of them just to do one convert.
	if empty(scale) then
		error('Missing scale.', 0)
	end
	assert(type(scale) == 'string', 'Bug: scale has an unexpected type')
	scale = string.gsub(scale, ',', '')  -- remove comma separators
	if tonumber(scale) then  -- not an expression
		return scale
	end
	local status, value = pcall(evaluate, scale)
	if not (status and type(value) == 'number') then
		error('Scale expression is invalid: "' .. scale .. '".', 0)
	end
	local result = string.format('%.17g', value)
	if result:find(bad_number_char, 1, true) then
		error('Scale expression gives an invalid value: "' .. scale .. '".', 0)
	end
	-- Omit redundant zeros from results like '1.2e-005'.
	-- Do not bother looking for results like '1.2e+005' as none occur in practice.
	local lhs, zeros, rhs = result:match('^(.-e%-)(0+)(.*)')
	if zeros then
		result = lhs .. rhs
	end
	return result
end

local function add_alias_optional_fields(unit, start, fields, target)
	-- Inspect fields[i] for i = start, start+1 ..., and extract any
	-- definitions appropriate for an alias or "per", and add them to unit.
	-- For an alias, target is a valid unit; for a "per", target is nil.
	-- Throw error if encounter an invalid entry.
	for i = start, #fields do
		local field = fields[i]
		if not empty(field) then
			local lhs, rhs = field:match('^%s*(.-)%s*=%s*(.-)%s*$')
			local good
			if not empty(rhs) then
				for _, item in ipairs({ 'sp', 'default', 'link', 'multiplier', 'symbol', 'symlink' }) do
					if lhs == item then
						if item == 'sp' then
							if rhs == 'us' then
								unit.sp_us = true
								good = true
							end
						elseif item == 'link' then
							local tlink
							if target then
								tlink = target[item]
							end
							local link, customary = clean_link(rhs, tlink)
							if link then
								unit[item] = link
							end
							if customary then
								unit.customary = customary
							end
							good = true
						elseif item == 'symlink' then
							local pos1 = rhs:find('[[', 1, true)
							local pos2 = rhs:find(']]', 1, true)
							if not (pos1 and pos2 and (pos1 < pos2)) then
								error('Alias "' .. unit.unitcode .. '" must include a wikilink ("[[...]]") in the symlink text.', 0)
							end
							unit.symlink = rhs
							good = true
						elseif item == 'multiplier' then
							if not tonumber(rhs) then
								error('Alias "' .. unit.unitcode .. '" has multiplier "' .. rhs .. '" which is not a number.', 0)
							end
							unit[item] = rhs
							good = true
						else
							if target and rhs == target[item] then
								error('Should omit "' .. item .. '" for alias "' .. unit.unitcode .. '" because it is the same as its target.', 0)
							end
							unit[item] = rhs
							good = true
						end
						break
					end
				end
			end
			if not good then
				error('Alias has invalid text in field "' .. field .. '".', 0)
			end
		end
	end
end

local function make_alias(fields, ucode, utype, symbol)
	-- Return a new alias unit, or return nil if symbol is not already
	-- defined as the unit code of the target unit.
	-- Throw an error if invalid.
	local target = get_unit(symbol)
	if not target then
		return nil
	end
	local unit = { unitcode = ucode, utype = utype, target = symbol }
	add_alias_optional_fields(unit, 3, fields, target)
	if alias_index[ucode] then
		error('Alias "' .. ucode .. '" already defined.', 0)
	else
		alias_index[ucode] = unit
	end
	if target.utype ~= utype then
		error('Target of alias "' .. ucode .. '" has wrong type.', 0)
	end
	return unit
end

local function make_per(fields, ucode, utype, symbol)
	-- Return a new "per" unit, or return nil if symbol is not of form "x/y".
	-- Throw an error if invalid.
	-- The top, bottom unit codes are checked later, after all units are defined.
	local top, bottom = symbol:match('^(.-)/(.*)$')
	if not top then
		return nil
	end
	local unit = { unitcode = ucode, utype = utype, per = { strip(top), strip(bottom) } }
	add_alias_optional_fields(unit, 3, fields)
	if per_index[ucode] then
		error('Per unit "' .. ucode .. '" already defined.', 0)
	else
		per_index[ucode] = unit
	end
	return unit
end

local function make_unit(data)
	-- Return a function which, when called, stores a table that defines a
	-- single unit. The table is stored in data (also a table).
	local fieldnames = {
		-- Fields in the Conversions section are assumed to be in the following order.
		'unitcode',
		'symbol',
		'sym_us',
		'scale',
		'extra',
		'name1',
		'name2',
		'name1_us',
		'name2_us',
		'prefixes',
		'default',
		'link',
	}
	return function (utype, fields)
		-- Store a table defining a unit.
		-- Throw an error if a problem occurs.
		local ucode, symbol = fields[1], fields[2]
		if empty(utype) then
			error('Missing unit type.', 0)
		end
		if empty(ucode) then
			error('Missing unit code.', 0)
		end
		if empty(symbol) then
			error('Missing symbol.', 0)
		end
		local prefix = symbol:sub(1, 1)
		if prefix == '~' or prefix == '=' or prefix == '!' or prefix == '*' then
			if symbol:sub(1, 2) == '==' then
				prefix = symbol:sub(1, 2)
			end
			symbol = strip(symbol:sub(#prefix + 1))  -- omit prefix and any following whitespace
			fields[2] = symbol
		else
			prefix = nil  -- not a valid prefix
		end
		if prefix == '=' or prefix == '==' then
			-- ucode is an alias (a fake unit code used in a convert template), or
			-- defines a "per" unit like "$/acre" or "BTU/h".
			-- For an alias, symbol is the unit code of the actual unit.
			-- For a "per", symbol is of form "x/y" where x and y are unit codes,
			-- or x is a recognized currency symbol and y is a unit code.
			-- Checking that x and y are valid is deferred until all units have
			-- been defined so, for example, "BTU/h" can be defined before "h".
			local unit
			if prefix == '=' then
				unit = make_alias(fields, ucode, utype, symbol)
			else
				unit = make_per(fields, ucode, utype, symbol)
			end
			if not unit then
				-- Do not define an alias in terms of another alias.
				error('Primary unit must be defined before alias =' .. symbol, 0)
			end
			insert_unique_unit(data, unit, units_index)
			return
		elseif prefix == '!' then
			-- ucode may be incorrectly entered as a unit code.
			-- symbol is a message saying what unit code should be used.
			local unit = { unitcode = ucode, shouldbe = symbol }
			insert_unique_unit(data, unit, nil)
			return
		end
		-- Make the unit.
		local unit = { utype = utype }
		for i, name in ipairs(fieldnames) do
			if not empty(fields[i]) then
				unit[name] = fields[i]
			end
		end
		-- Remove redundancy from unit.
		if unit.sym_us == symbol then
			unit.sym_us = nil
		end
		local prefixes = unit.prefixes
		if empty(prefixes) then
			prefixes = nil
		end
		local name1, name2 = unit.name1, unit.name2
		if name1 then
			if name1 == symbol and not prefixes then
				-- A unit which takes an SI prefix must not have a nil name because,
				-- for example, the name for "kW" = "kilo" .. "watt" (name for "W").
				-- The "not prefixes" test is needed for bn.wikipedia where the
				-- watt unit has the same name and symbol.
				unit.name1 = nil
			end
		else
			name1 = symbol
		end
		if name2 then
			if plural_suffix ~= '' and name2 == name1 .. plural_suffix then
				unit.name2 = nil
			end
		end
		local name1_us, name2_us = unit.name1_us, unit.name2_us
		if name1_us then
			if name1_us == name1 then
				unit.name1_us = nil
			end
		else
			name1_us = name1
		end
		if name2_us then
			if plural_suffix ~= '' and name2_us == name1_us .. plural_suffix then
				unit.name2_us = nil
			end
		end
		-- Other changes to unit.
		unit.scale = clean_scale(unit.scale)
		local extra = unit.extra
		if not empty(extra) then
			-- Set appropriate fields for a unit that needs more than a simple
			-- multiplication by a ratio of unit scales to convert values.
			unit.iscomplex = true
			if extra == 'volume/length' then
				unit.invert = 1
			elseif extra == 'length/volume' then
				unit.invert = -1
			elseif specials.utype[utype] == 'type_temperature' then
				unit.offset = extra
			elseif extra == 'invert' then
				unit.invert = -1
			else
				unit.builtin = extra
			end
		end
		if prefix == '~' then
			-- Magic code for units like "acre" where the symbol is not really a
			-- symbol, and output should use the singular or plural name instead.
			unit.usename = 1
		elseif prefix == '*' then
			-- Magic code for units like "pitch" which have a symbol that is the same as
			-- another unit with entries defined in the default or link exceptions tables.
			unit.defkey = ucode  -- key for default exceptions
			unit.linkey = ucode  -- key for link exceptions
		end
		local name_for_link
		if prefixes then
			-- If set name_for_link = name1 for a prefixed unit like g, then the
			-- link is "kilogram" for kg, and "yottagram" for Yg, and so on for all
			-- prefixes. That might be good for some units, but not all.
			if prefixes == 'SI' then
				unit.prefixes = 1
			elseif prefixes == 'SI2' then
				unit.prefixes = 2
			elseif prefixes == 'SI3' then
				unit.prefixes = 3
			else
				error('Unknown prefix: "' .. prefixes .. '".', 0)
			end
		else
			name_for_link = name1
		end
		unit.link, unit.customary = clean_link(unit.link, name_for_link)
		if prefixes then
			local name1, name1_us = unit.name1, unit.name1_us  -- after redundancy removed
			if name1 == nil then
				error('Unit with Prefix set must include Name.', 0)
			end
			if unit.name2 or unit.name2_us then
				error('Unit with Prefix set must have plural name that is Name + "s".', 0)
			end
			if unit.sym_us then
				error('Unit with Prefix set must have same Symbol and US symbol.', 0)
			end
			local pos = name1:find('%s', 1, true)
			local pos_us
			if name1_us then
				pos_us = name1_us:find('%s', 1, true)
			else
				pos_us = pos
			end
			if pos ~= pos_us then
				-- The only cases with "%s" are "square %smetre" and "cubic %smetre" (or "meter" for US),
				-- so do not bother having a procedure to handle different positions of "%s".
				error('Unit with Prefix set and "%s" in Name and US name, must have the "%s" at the same position.', 0)
			end
			if pos then
				if pos == 1 then
					-- Omit leading "%s"; convert will assume position = 1 since not stored.
					unit.name1 = name1:sub(3)
					if name1_us then
						unit.name1_us = name1_us:sub(3)
					end
				else
					-- Omit "%s" and store its position.
					unit.name1 = name1:sub(1, pos - 1) .. name1:sub(pos + 2)
					if name1_us then
						unit.name1_us = name1_us:sub(1, pos - 1) .. name1_us:sub(pos + 2)
					end
					unit.prefix_position = pos
				end
			end
			for _, name in ipairs({ 'symbol', 'name1', 'name1_us' }) do
				unit['_' .. name] = unit[name]
				unit[name] = nil  -- force call to __index metamethod so any SI prefix can be handled
			end
		end
		for name, v in pairs(unit) do
			-- Reject if a string field includes "%s" (should never occur after above).
			if type(v) == 'string' and v:find('%s', 1, true) then
				error('Field "' .. name .. '" must not contain "%s".', 0)
			end
		end
		insert_unique_unit(data, unit, units_index)
	end
end

local function make_combination(data)
	-- Return a function which, when called, stores a table that defines a
	-- single combination unit. The table is stored in data (also a table).
	return function (utype, fields)
		-- Store a table defining a unit.
		-- This is for a combination unit that specifies more than one output.
		-- The target units must be defined first.
		-- Throw an error if a problem occurs.
		local unit = { utype = utype, combination = {} }
		for i, v in ipairs(fields) do
			if i == 1 then  -- unitcode
				if v == '' then
					error('Missing unit code for a combination.', 0)
				end
				unit.unitcode = v
			elseif v == '' then
				-- Ignore empty fields.
			else
				local target = get_unit(v)
				if target == nil then
					error('Unit "' .. v .. '" in combination "' .. unit.unitcode .. '" not defined.', 0)
				end
				if target.utype ~= utype then
					error('Unit "' .. v .. '" in combination "' .. unit.unitcode .. '" has wrong type.', 0)
				end
				table.insert(unit.combination, v)
			end
		end
		if #unit.combination < 2 then
			local msg
			if #unit.combination == 0 then
				msg = 'No units specified for combination "' .. unit.unitcode .. '"'
			else
				msg = 'Only one unit specified for combination "' .. unit.unitcode .. '"'
			end
			error(msg, 0)
		end
		insert_unique_unit(data, unit, units_index)
	end
end

local function reversed(t)
	-- Return a numbered table in reverse order.
	local reversed, count = {}, #t
	for i = 1, count do
		reversed[i] = t[count + 1 - i]
	end
	return reversed
end

local function make_inputmultiple(data)
	-- Return a function which, when called, stores a table that defines a
	-- single composite (multiple input) unit. The table is stored in data (also a table).
	return function (utype, fields)
		-- Set or update an entry in the data table to record that a unit
		-- accepts subdivisions to make a composite input unit like '|2|ft|6|in'.
		-- The target units must be defined first.
		-- Throw an error if a problem occurs.
		local unitcode  -- dummy code required for simplicity, but which is not used in output
		local alternate_code  -- an alternative unit code can be specified to replace convert input
		local fixed_name  -- a fixed name can be specified to replace the unit's normal symbol/name
		local default_code
		local ucodes, scales = {}, {}
		for i, v in ipairs(fields) do
			-- 1=composite, 2=ucode1, 3=ucode2, 4=default, 5=alternate, 6=name
			if i == 1 then
				if v == '' then
					error('Missing unit code for a composite.', 0)
				end
				unitcode = v
			elseif 2 <= i and i <= 5 then
				if not (i == 5 and v == '') then
					local target = get_unit(v)
					if target == nil then
						error('Unit "' .. v .. '" in composite "' .. unitcode .. '" not defined.', 0)
					end
					if target.utype ~= utype then
						error('Unit "' .. v .. '" in composite "' .. unitcode .. '" has wrong type.', 0)
					end
					if i < 4 then
						table.insert(ucodes, v)
						table.insert(scales, target.scale)
					elseif i == 4 then
						default_code = v
					else
						if scales[#scales] ~= target.scale then
							error('Alternate unit "' .. v .. '" in composite "' .. unitcode .. '" has wrong scale.', 0)
						end
						alternate_code = v
					end
				end
			elseif i == 6 then
				if v ~= '' then
					fixed_name = v
				end
			else
				error('Composite "' .. unitcode .. '" has too many fields.', 0)
			end
		end
		if #ucodes ~= 2 then
			error('Composite "' .. unitcode .. '" must specify exactly two unit codes.', 0)
		end
		if default_code == nil then
			error('Composite "' .. unitcode .. '" must specify a default unit code.', 0)
		end
		-- Component units must be specified from most-significant to least-significant,
		-- and each ratio of a pair of scales must be very close to an integer.
		-- Currently, there will be exactly two scales and one ratio.
		local ratios, count = {}, #scales
		for i = 1, count do
			local scale = tonumber(scales[i])
			if scale == nil or scale <= 0 then
				error('Composite "' .. unitcode .. '" has a component with an invalid scale, "' .. scales[i] .. '".', 0)
			end
			scales[i] = scale
		end
		for i = 1, count - 1 do
			local ratio = scales[i] / scales[i + 1]
			local rounded = math.floor(ratio + 0.5)
			if rounded < 2 then
				error('Composite "' .. unitcode .. '" has components in wrong order or with invalid scales.', 0)
			end
			if math.abs(ratio - rounded)/ratio > 1e-6 then
				error('Composite "' .. unitcode .. '" has components where scale ratios are not integers.', 0)
			end
			ratios[i] = rounded
		end
		local text = { tostring(ratios[1]) }
		local function add_text(key, value)
			table.insert(text, string.format('%s = %q', key, value))
		end
		if default_code then
			add_text('default', default_code)
		end
		if alternate_code then
			add_text('unit', alternate_code)
		end
		if fixed_name then
			add_text('name', fixed_name)
		end
		local subdiv = string.format('["%s"] = { %s }', ucodes[2], table.concat(text, ', '))
		local main_code = ucodes[1]
		local item = data[main_code]
		if item then
			table.insert(item.subdivs, subdiv)
		else
			data[main_code] = { subdivs = { subdiv } }
		end
	end
end

local function make_outputmultiple(data)
	-- Return a function which, when called, stores a table that defines a
	-- single multiple output unit. The table is stored in data (also a table).
	return function (utype, fields)
		-- Store a table defining a unit.
		-- This is for a multiple unit like 'ydftin' (result in yards, feet, inches).
		-- The target units must be defined first.
		-- Throw an error if a problem occurs.
		local unit = { utype = utype }
		local ucodes, scales = {}, {}
		for i, v in ipairs(fields) do
			if i == 1 then  -- unitcode
				if v == '' then
					error('Missing unit code for a multiple.', 0)
				end
				unit.unitcode = v
			elseif v == '' then
				-- Ignore empty fields.
			else
				local target = get_unit(v)
				if target == nil then
					error('Unit "' .. v .. '" in multiple "' .. unit.unitcode .. '" not defined.', 0)
				end
				if target.utype ~= utype then
					error('Unit "' .. v .. '" in multiple "' .. unit.unitcode .. '" has wrong type.', 0)
				end
				table.insert(ucodes, v)
				table.insert(scales, target.scale)
			end
		end
		if #ucodes < 2 then
			local msg
			if #ucodes == 0 then
				msg = 'No units specified for multiple "' .. unit.unitcode .. '"'
			else
				msg = 'Only one unit specified for multiple "' .. unit.unitcode .. '"'
			end
			error(msg, 0)
		end
		-- Component units must be specified from most-significant to least-significant
		-- (so scale values will be in descending order),
		-- and each ratio of a pair of scales must be very close to an integer.
		-- The componenets and ratios are stored in reverse order (least significant first).
		-- This script stores a unit scale as a string (might be an expression like "5/9"),
		-- but scales in a multiple are handled as numbers (should never be expressions).
		local ratios, count = {}, #scales
		for i = 1, count do
			local scale = tonumber(scales[i])
			if scale == nil or scale <= 0 then
				error('Multiple "' .. unit.unitcode .. '" has a component with an invalid scale, "' .. scales[i] .. '".', 0)
			end
			scales[i] = scale
		end
		for i = 1, count - 1 do
			local ratio = scales[i] / scales[i + 1]
			local rounded = math.floor(ratio + 0.5)
			if rounded < 2 then
				error('Multiple "' .. unit.unitcode .. '" has components in wrong order or with invalid scales.', 0)
			end
			if math.abs(ratio - rounded)/ratio > 1e-6 then
				error('Multiple "' .. unit.unitcode .. '" has components where scale ratios are not integers.', 0)
			end
			ratios[i] = rounded
		end
		unit.combination = reversed(ucodes)
		unit.multiple = reversed(ratios)
		insert_unique_unit(data, unit, units_index)
	end
end

-- To make updating Module:Convert/data easier, this script inserts a preamble
-- and a postamble so the result can be used to replace the whole page.
local data_preamble = [=[
-- Conversion data used by [[Module:Convert]] which uses mw.loadData() for
-- read-only access to this module so that it is loaded only once per page.
--
-- These data tables follow:
--   all_units           all properties for a unit, including default output
--   default_exceptions  exceptions for default output ('kg' and 'g' have different defaults)
--   link_exceptions     exceptions for links ('kg' and 'g' have different links)
--
-- These tables are generated by a script which reads the wikitext of a page that
-- documents the required properties of each unit; see [[Module:Convert/doc]].
]=]

local data_postamble = [=[
return {
	all_units = all_units,
	default_exceptions = default_exceptions,
	link_exceptions = link_exceptions,
}]=]

local out_unit_prefix = [[
---------------------------------------------------------------------------
-- Do not change the data in this table because it is created by running --
-- a script that reads the wikitext from a wiki page (see note above).   --
---------------------------------------------------------------------------
local all_units = {]]

local out_unit_suffix = [[
}
]]

local out_default_prefix = [[
---------------------------------------------------------------------------
-- Do not change the data in this table because it is created by running --
-- a script that reads the wikitext from a wiki page (see note above).   --
---------------------------------------------------------------------------
local default_exceptions = {
	-- Prefixed units with a default different from that of the base unit.
	-- Each key item is a prefixed symbol (unitcode for engineering notation).]]

local out_default_suffix = [[
}
]]

local out_default_item = [[
	["{symbol}"] = "{default}",]]

local out_link_prefix = [[
---------------------------------------------------------------------------
-- Do not change the data in this table because it is created by running --
-- a script that reads the wikitext from a wiki page (see note above).   --
---------------------------------------------------------------------------
local link_exceptions = {
	-- Prefixed units with a linked article different from that of the base unit.
	-- Each key item is a prefixed symbol (not unitcode).]]

local out_link_suffix = [[
}
]]

local out_link_item = [[
	["{symbol}"] = "{link}",]]

local combination_specification = {     -- pure combination like 'm ft', or a multiple like 'ftin'
	'combination',
	'multiple',
	'utype',
}

local alias_specification = {
	'target',
	'symbol',
	'sp_us',
	'default',
	'link',
	'symlink',
	'customary',
	'multiplier',
}

local per_specification = {
	'per',
	'symbol',
	'sp_us',
	'utype',
	'invert',
	'iscomplex',
	'default',
	'link',
	'symlink',
	'customary',
	'multiplier',
}

local shouldbe_specification = {
	'shouldbe',
}

local unit_specification = {
	'_name1',
	'_name1_us',
	'_symbol',
	'prefix_position',
	'name1',
	'name1_us',
	'name2',
	'name2_us',
	'symbol',
	'sym_us',
	'usename',
	'usesymbol',
	'utype',
	'alttype',
	'builtin',
	'scale',
	'offset',
	'invert',
	'iscomplex',
	'istemperature',
	'exception',
	'prefixes',
	'default',
	'subdivs',
	'defkey',
	'linkey',
	'link',
	'customary',
	'sp_us',
}

local no_quotes = {
	combination = true,
	customary = true,
	multiple = true,
	multiplier = true,
	offset = true,
	per = true,
	prefix_position = true,
	scale = true,
	subdivs = true,
}

local function add_unit_lines(results, unit, spec)
	-- Add lines of Lua source to define a unit to the results collection.
	local function add_line(line)
		-- Had planned to replace sequences of spaces with 4-column tabs here
		-- (because the CodeEditor now assumes the use of such tabs).
		-- However, 4-column tabs are only visible when editing Module:Convert
		-- with browser scripting and the CodeEditor enabled, and that is rare.
		-- Module:Convert would usually be viewed (with 8-column tabs), and some
		-- indents would be messed up unless 8-column tabs are used. Therefore,
		-- have decided to simply replace 8 spaces at start of line with a single
		-- tab which reduces the size of the module, and is correct for viewing.
		if line:sub(1, 8) == string.rep(' ', 8) then
			line = '\t' .. line:sub(9)
		end
		results:add(line)
	end
	local first_item = '    ["' .. unit.unitcode .. '"] = {'
	local last_item  = '    },'
	add_line(first_item)
	for _, k in ipairs(spec) do
		local v = unit[k]
		if v then
			local want_quotes = (type(v) == 'string' and not no_quotes[k])
			if type(v) == 'boolean' then
				v = tostring(v)
			elseif type(v) == 'number' or k == 'scale' then
				-- Replace results like '1e-006' with '1e-6'.
				v = string.gsub(tostring(v), '(e[+-])0+([1-9].*)', '%1%2', 1)
			elseif type(v) ~= 'string' then
				error('Fatal error: unknown data type for ' .. unit.unitcode)
			end
			local fmt = string.format('%8s%%-9s= %%%s,', '', want_quotes and 'q' or 's')
			add_line(fmt:format(k, v))
		end
	end
	add_line(last_item)
end

local function numbered_table_as_string(data, unit)
	local t = {}
	for _, v in ipairs(data) do
		if type(v) == 'string' then
			table.insert(t, '"' .. v .. '"')
		elseif type(v) == 'number' then
			table.insert(t, tostring(v))
		else
			error('Fatal error: unknown data type for ' .. unit.unitcode)
		end
	end
	return '{ ' .. table.concat(t, ', ') .. ' }'
end

local function extract_heading(line)
	-- Return n, s where n = heading level number (nil if none), and
	-- s = heading text (with leading/trailing whitespace removed).
	local pattern = '^(==+)%s*(.-)%s*(==+)%s*$'
	local before, heading, after = line:match(pattern)
	if heading and #heading > 0 then
		-- Don't bother checking if before == after.
		return #before, heading
	end
end

local function fields(line)
	-- Return a numbered table of fields split from line.
	-- Items are delimited by "||".
	-- Each item has leading/trailing whitespace removed, and any encoded pipe
	-- characters are decoded.
	-- The second field (for symbol when processing units) is adjusted to
	-- remove any "colspan" at the front of lines like:
	-- "| unitcode || colspan="11" | !Text to display for an error message".
	local t = {}
	line = line .. "||"  -- to get last field
	for item in line:gmatch("%s*(.-)%s*||") do
		table.insert(t, (item:gsub('&#124;', '|')))
	end
	local cleaned = t[2]:match('^%s*colspan%s*=.-|%s*(.*)$')
	if cleaned then
		t[2] = cleaned
	end
	return t
end

local function prepare_section(maker, lines, section, maxerrors, is_sandbox)
	-- Process the first level-two section with the given section name
	-- in the given table of lines of wikitext.
	-- If successful, maker inserts each item into a table.
	-- Otherwise, an error is thrown.
	local skip = true
	local errors = collection()
	local utype  -- unit type (from level-three heading)
	local nbsp = '\160'  -- nonbreaking space is utf-8 encoded as hex c2 a0
	for linenumber, line in ipairs(lines) do
		if skip then
			-- Skip down to and including the starting heading.
			local level, heading = extract_heading(line)
			if level == 2 and heading == section then
				skip = false
			end
		else
			-- Accummulate unit definitions.
			local c1 = line:sub(1, 1)
			local c2 = line:sub(2, 2)
			if c1 == '|' and not (c2 == '-' or c2 == '}') then
				if empty(utype) then
					error('No level 3 heading before: ' .. line, 0)
				end
				if line:find(nbsp, 1, true) then
					-- For example, "acre ft" does not work if it contains nbsp.
					add_warning('Line ' .. linenumber .. ' contains a nonbreaking space')
				end
				local ok, msg = pcall(maker, utype, fields(line:sub(2)))
				if not ok then
					if msg:sub(-1) == '.' then msg = msg:sub(1, -2) end
					errors:add(msg .. ' (line ' .. linenumber .. ').')
					if errors.n >= maxerrors then
						break
					end
				end
			else
				local level, heading = extract_heading(line)
				if level == 3 then
					utype = heading:lower()
				elseif level == 2 then
					break
				end
			end
		end
	end
	if skip and not is_sandbox then
		error('Level 2 heading "' .. section ..'" not found.', 0)
	end
	if errors.n > 0 then
		error(errors:join(), 0)
	end
end

local function get_page_lines(page_title)
	-- Read the wikitext of the page at the given title; split the text into
	-- lines with leading and trailing space removed from each line.
	-- Return a numbered table of the lines, or throw an error.
	if empty(page_title) then
		error('Need title of page with unit definitions.', 0)
	end
	local t = mw.title.new(page_title)
	if t then
		local content = t:getContent()
		if content then
			if content:sub(-1) ~= '\n' then
				content = content .. '\n'
			end
			local lines = collection()
			for line in string.gmatch(content, '[\t ]*(.-)[\t\r ]*\n') do
				lines:add(line)
			end
			return lines
		end
	end
	error('Could not read wikitext from "[[' .. page_title .. ']]".', 0)
end

local function prepare_data(conversion_data_title, maxerrors, is_sandbox)
	-- Read the page of conversion data, and process the wikitext
	-- in the sections with wanted level-two headings.
	-- Return units, defaults, links (three tables).
	-- Throw an error if a problem occurs.
	local composites, defaults, links, units = {}, {}, {}, {}
	local sections = {
		-- LATER: Section names must be in English to match following text.
		{ 'Overrides'       , make_override      , overrides  },
		{ 'Conversions'     , make_unit          , units      },
		{ 'Output multiples', make_outputmultiple, units      },
		{ 'Combinations'    , make_combination   , units      },
		{ 'Input multiples' , make_inputmultiple , composites },  -- after all units defined so default will be defined
		{ 'Defaults'        , make_default       , defaults   },
		{ 'Links'           , make_link          , links      },
	}
	local lines = get_page_lines(conversion_data_title)
	for _, section in ipairs(sections) do
		local heading = section[1]
		local maker = section[2](section[3])
		prepare_section(maker, lines, heading, maxerrors, is_sandbox)
	end
	check_all_defaults(units, maxerrors)
	check_all_pers(units, maxerrors)
	update_units(units, composites)
	return units, defaults, links
end

local function _makeunits(results, conversion_data_title, text_module_title, is_sandbox)
	-- Read the wikitext for the conversion data.
	-- Append output to given results collection, or throw error if a problem.
	local text_code = require(text_module_title)
	SIprefixes = text_code.SIprefixes
	eng_scales = text_code.eng_scales
	local translation = text_code.translation_table
	if translation then
		if translation.plural_suffix then
			plural_suffix = translation.plural_suffix
		end
		local ts = translation.specials
		if ts then
			if ts.utype then
				specials.utype = ts.utype
			end
			if ts.ucode then
				specials.ucode = ts.ucode
			end
		end
	end
	local units, defaults, links = prepare_data(conversion_data_title, 20, is_sandbox)
	if data_preamble then
		results:add(data_preamble)
	end
	results:add(out_unit_prefix)
	for _, unit in ipairs(units) do
		local spec
		if unit.target then
			spec = alias_specification
		elseif unit.per then
			spec = per_specification
			unit.per = numbered_table_as_string(unit.per, unit)
		elseif unit.shouldbe then
			spec = shouldbe_specification
		elseif unit.combination then
			spec = combination_specification
			unit.combination = numbered_table_as_string(unit.combination, unit)
			if unit.multiple then
				unit.multiple = numbered_table_as_string(unit.multiple, unit)
			end
		else
			spec = unit_specification
		end
		add_unit_lines(results, unit, spec)
	end
	results:add(out_unit_suffix)
	if #defaults > 0 or not is_sandbox then
		results:add(out_default_prefix)
		for _, unit in ipairs(defaults) do
			results:add((out_default_item:gsub('{([%w_]+)}', unit)))
		end
		results:add(out_default_suffix)
	end
	if #links > 0 or not is_sandbox then
		results:add(out_link_prefix)
		for _, unit in ipairs(links) do
			results:add((out_link_item:gsub('{([%w_]+)}', unit)))
		end
		results:add(out_link_suffix)
	end
	if data_postamble then
		results:add(data_postamble)
	end
end

local function makeunits(frame)
	local is_sandbox
	local args = frame.args
	local conversion_data_title = 'Module:Convert/documentation/conversion data/doc'
	local text_module_title = args[2] or 'Module:Convert/text'
	local title = args[1]
	if title and conversion_data_title ~= title then
		conversion_data_title = title
		is_sandbox = true
		data_preamble = nil
		data_postamble = nil
		out_unit_prefix = 'local all_units = {'
		out_unit_suffix = '}'
		out_default_prefix = '\nlocal default_exceptions = {'
		out_default_suffix = '}'
		out_default_item = '\t["{symbol}"] = "{default}",'
		out_link_prefix = '\nlocal link_exceptions = {'
		out_link_suffix = '}'
		out_link_item = '\t["{symbol}"] = "{link}",'
	end
	local results = collection()
	local ok, msg = pcall(_makeunits, results, conversion_data_title, text_module_title, is_sandbox)
	if not ok then
		results:add('Error:\n')
		results:add(msg)
	end
	local warn = ''
	if warnings.n > 0 then
		warn = 'Warning:\n\n' .. warnings:join() .. '\n\n'
	end
	-- Pre tags returned by a module are html tags, not like wikitext <pre>...</pre>.
	-- The following renders the text as is, and preserves tab characters.
	return '<pre>\n' .. mw.text.nowiki(warn .. results:join()) .. '\n</pre>\n'
end

return { makeunits = makeunits }