You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

102 lines
3.5 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

from typing import Any, Callable, Dict, Optional, Tuple, TypeVar, Union, cast
from ..config import registry
from ..initializers import uniform_init
from ..model import Model
from ..types import Floats1d, Floats2d, Ints1d, Ints2d
from ..util import partial
from .array_getitem import ints_getitem
from .chain import chain
InT = TypeVar("InT", bound=Union[Ints1d, Ints2d])
OutT = Floats2d
@registry.layers("HashEmbed.v1")
def HashEmbed(
nO: int,
nV: int,
*,
seed: Optional[int] = None,
column: Optional[int] = None,
initializer: Optional[Callable] = None,
dropout: Optional[float] = None
) -> Model[InT, OutT]:
"""
An embedding layer that uses the “hashing trick” to map keys to distinct values.
The hashing trick involves hashing each key four times with distinct seeds,
to produce four likely differing values. Those values are modded into the
table, and the resulting vectors summed to produce a single result. Because
its unlikely that two different keys will collide on all four “buckets”,
most distinct keys will receive a distinct vector under this scheme, even
when the number of vectors in the table is very low.
"""
attrs: Dict[str, Any] = {"column": column, "seed": seed}
if initializer is None:
initializer = uniform_init
if dropout is not None:
attrs["dropout_rate"] = dropout
model: Model = Model(
"hashembed",
forward,
init=partial(init, initializer),
params={"E": None},
dims={"nO": nO, "nV": nV, "nI": None},
attrs=attrs,
)
if seed is None:
model.attrs["seed"] = model.id
if column is not None:
# This is equivalent to array[:, column]. What you're actually doing
# there is passing in a tuple: array[(:, column)], except in the context
# of array indexing, the ":" creates an object slice(0, None).
# So array[:, column] is array.__getitem__(slice(0), column).
model = chain(ints_getitem((slice(0, None), column)), model)
model.attrs["column"] = column
return cast(Model[InT, OutT], model)
def forward(
model: Model[Ints1d, OutT], ids: Ints1d, is_train: bool
) -> Tuple[OutT, Callable]:
vectors = cast(Floats2d, model.get_param("E"))
nV = vectors.shape[0]
nO = vectors.shape[1]
if len(ids) == 0:
output: Floats2d = model.ops.alloc2f(0, nO, dtype=vectors.dtype)
else:
ids = model.ops.as_contig(ids, dtype="uint64")
nN = ids.shape[0]
seed: int = model.attrs["seed"]
keys = model.ops.hash(ids, seed) % nV
output = model.ops.gather_add(vectors, keys)
drop_mask = None
if is_train:
dropout: Optional[float] = model.attrs.get("dropout_rate")
drop_mask = cast(Floats1d, model.ops.get_dropout_mask((nO,), dropout))
if drop_mask is not None:
output *= drop_mask
def backprop(d_vectors: OutT) -> Ints1d:
if drop_mask is not None:
d_vectors *= drop_mask
dE = model.ops.alloc2f(*vectors.shape)
keysT = model.ops.as_contig(keys.T, dtype="i")
for i in range(keysT.shape[0]):
model.ops.scatter_add(dE, keysT[i], d_vectors)
model.inc_grad("E", dE)
dX = model.ops.alloc1i(nN)
return dX
return output, backprop
def init(
initializer: Callable,
model: Model[Ints1d, OutT],
X: Optional[Ints1d] = None,
Y: Optional[OutT] = None,
) -> None:
E = initializer(model.ops, (model.get_dim("nV"), model.get_dim("nO")))
model.set_param("E", E)