diff --git a/cirkit/backend/torch/circuits.py b/cirkit/backend/torch/circuits.py index 3fe5299d..89996d71 100644 --- a/cirkit/backend/torch/circuits.py +++ b/cirkit/backend/torch/circuits.py @@ -54,10 +54,16 @@ def lookup( if in_graph is None: yield layer, () continue - # in_graph: An input batch (assignments to variables) of shape (B, C, D) + # in_graph: An input batch (assignments to variables) of shape (B, D) # scope_idx: The scope of the layers in each fold, a tensor of shape (F, D'), D' < D - # x: (B, C, D) -> (B, C, F, D') -> (F, C, B, D') - x = in_graph[..., layer.scope_idx].permute(2, 1, 0, 3) + # x: (B, D) -> (B, F, D') -> (F, B, D') + if len(in_graph.shape) != 2: + raise ValueError( + "The input to the circuit should have shape (B, D), " + "where B is the batch size and D is the number of variables " + "the circuit is defined on" + ) + x = in_graph[..., layer.scope_idx].permute(1, 0, 2) yield layer, (x,) continue @@ -121,7 +127,6 @@ class AbstractTorchCircuit(TorchDiAcyclicGraph[TorchLayer]): def __init__( self, scope: Scope, - num_channels: int, layers: Sequence[TorchLayer], in_layers: dict[TorchLayer, Sequence[TorchLayer]], outputs: Sequence[TorchLayer], @@ -133,7 +138,6 @@ def __init__( Args: scope: The variables scope. - num_channels: The number of channels per variable. layers: The sequence of layers. in_layers: A dictionary mapping layers to their inputs, if any. outputs: A list of output layers. @@ -148,7 +152,6 @@ def __init__( fold_idx_info=fold_idx_info, ) self._scope = scope - self._num_channels = num_channels self._properties = properties @property @@ -169,15 +172,6 @@ def num_variables(self) -> int: """ return len(self.scope) - @property - def num_channels(self) -> int: - """Retrieve the number of channels of each variable. - - Returns: - The number of variables. - """ - return self._num_channels - @property def properties(self) -> StructuralProperties: """Retrieve the structural properties of the circuit. @@ -272,8 +266,8 @@ def forward(self, x: Tensor) -> Tensor: following the topological ordering. Args: - x: The tensor input of the circuit, with shape $(B, C, D)$, where B is the batch size, - $C$ is the number of channels, and $D$ is the number of variables. + x: The tensor input of the circuit, with shape $(B, D)$, where B is the batch size, + and $D$ is the number of variables. Returns: Tensor: The tensor output of the circuit, with shape $(B, O, K)$, diff --git a/cirkit/backend/torch/compiler.py b/cirkit/backend/torch/compiler.py index d42266ce..2711813a 100644 --- a/cirkit/backend/torch/compiler.py +++ b/cirkit/backend/torch/compiler.py @@ -228,7 +228,6 @@ def _compile_circuit(self, sc: Circuit) -> AbstractTorchCircuit: layers = list(compiled_layers_map.values()) cc = cc_cls( sc.scope, - sc.num_channels, layers=layers, in_layers=in_layers, outputs=outputs, @@ -275,7 +274,6 @@ def _fold_circuit(compiler: TorchCompiler, cc: AbstractTorchCircuit) -> Abstract # Instantiate a folded circuit return type(cc)( cc.scope, - cc.num_channels, layers, in_layers, outputs, @@ -507,7 +505,7 @@ def match_optimizer_fuse(match: LayerOptMatch) -> tuple[TorchLayer, ...]: if optimize_result is None: return cc, False layers, in_layers, outputs = optimize_result - cc = type(cc)(cc.scope, cc.num_channels, layers, in_layers, outputs, properties=cc.properties) + cc = type(cc)(cc.scope, layers, in_layers, outputs, properties=cc.properties) return cc, True diff --git a/cirkit/backend/torch/layers/inner.py b/cirkit/backend/torch/layers/inner.py index a4309f1b..f46f0162 100644 --- a/cirkit/backend/torch/layers/inner.py +++ b/cirkit/backend/torch/layers/inner.py @@ -276,11 +276,11 @@ def sample(self, x: Tensor) -> tuple[Tensor, Tensor]: if negative or not normalized: raise TypeError("Sampling in sum layers only works with positive weights summing to 1") - # x: (F, H, C, Ki, num_samples, D) -> (F, C, H * Ki, num_samples, D) - x = x.permute(0, 2, 1, 3, 4, 5).flatten(2, 3) - c = x.shape[1] - num_samples = x.shape[3] - d = x.shape[4] + # x: (F, H, Ki, num_samples, D) -> (F, H * Ki, num_samples, D) + x = x.flatten(1, 2) + + num_samples = x.shape[2] + d = x.shape[3] # mixing_distribution: (F, Ko, H * Ki) mixing_distribution = torch.distributions.Categorical(probs=weight) @@ -289,9 +289,9 @@ def sample(self, x: Tensor) -> tuple[Tensor, Tensor]: mixing_samples = mixing_distribution.sample((num_samples,)) mixing_samples = E.rearrange(mixing_samples, "n f k -> f k n") - # mixing_indices: (F, C, Ko, num_samples, D) - mixing_indices = E.repeat(mixing_samples, "f k n -> f c k n d", c=c, d=d) + # mixing_indices: (F, Ko, num_samples, D) + mixing_indices = E.repeat(mixing_samples, "f k n -> f k n d", d=d) - # x: (F, C, Ko, num_samples, D) - x = torch.gather(x, dim=2, index=mixing_indices) + # x: (F, Ko, num_samples, D) + x = torch.gather(x, dim=1, index=mixing_indices) return x, mixing_samples diff --git a/cirkit/backend/torch/layers/input.py b/cirkit/backend/torch/layers/input.py index fcca7528..bd6ef437 100644 --- a/cirkit/backend/torch/layers/input.py +++ b/cirkit/backend/torch/layers/input.py @@ -18,7 +18,6 @@ def __init__( scope_idx: Tensor, num_output_units: int, *, - num_channels: int = 1, semiring: Semiring | None = None, ) -> None: r"""Initialize a torch input layer. @@ -29,7 +28,6 @@ def __init__( on. Alternatively, a tensor of shape $(D,)$ can be specified, which will be interpreted as a tensor of shape $(1, D)$, i.e., with $F = 1$. num_output_units: The number of output units. - num_channels: The number of channels. semiring: The evaluation semiring. Defaults to [SumProductSemiring][cirkit.backend.torch.semiring.SumProductSemiring]. @@ -44,7 +42,6 @@ def __init__( super().__init__( num_variables, num_output_units, - arity=num_channels, num_folds=num_folds, semiring=semiring, ) @@ -68,15 +65,6 @@ def num_variables(self) -> int: """ return self.num_input_units - @property - def num_channels(self) -> int: - """The number of channels per variable. - - Returns: - The number of channels. - """ - return self.arity - @property @abstractmethod def config(self) -> Mapping[str, Any]: @@ -110,9 +98,9 @@ def sample(self, num_samples: int = 1) -> Tensor: num_samples: The number of data points to sample. Returns: - Tensor: The tensorized sample, having shape $(F, C, K, N)$, where + Tensor: The tensorized sample, having shape $(F, K, N)$, where $F$ is the number of folds, $K$ is the number of output units, - $C$ is the number of channels, and $N$ is the number of samples. + and $N$ is the number of samples. Raises: TypeError: If sampling is not supported by the layer. @@ -124,7 +112,6 @@ def extra_repr(self) -> str: " ".join( [ f"folds: {self.num_folds}", - f"channels: {self.num_channels}", f"variables: {self.num_variables}", f"output-units: {self.num_output_units}", ] @@ -148,9 +135,8 @@ def forward(self, x: Tensor) -> Tensor: r"""Invoke the forward function. Args: - x: The tensor input to this layer, having shape $(F, C, B, D)$, where $F$ - is the number of folds, $C$ is the number of channels, - $B$ is the batch size, and $D$ is the number of variables. + x: The tensor input to this layer, having shape $(F, B, D)$, where $F$ + is the number of folds, $B$ is the batch size, and $D$ is the number of variables. Returns: Tensor: The tensor output of this layer, having shape $(F, B, K)$, where $K$ @@ -208,7 +194,6 @@ def __init__( self, scope_idx: Tensor, num_output_units: int, - num_channels: int = 1, *, num_states: int = 2, weight: TorchParameter, @@ -222,10 +207,9 @@ def __init__( on. Alternatively, a tensor of shape $(D,)$ can be specified, which will be interpreted as a tensor of shape $(1, D)$, i.e., with $F = 1$. num_output_units: The number of output units. - num_channels: The number of channels. num_states: The number of states $V$ each variable can assume. - weight: The weight parameter of shape $(F, K, C, N)$, where $K$ is the number of output - units, $C$ is the number of channels, and $V$ is the number of states. + weight: The weight parameter of shape $(F, K, N)$, where $K$ is the number of output + units, and $V$ is the number of states. semiring: The evaluation semiring. Defaults to [SumProductSemiring][cirkit.backend.torch.semiring.SumProductSemiring]. @@ -242,7 +226,6 @@ def __init__( super().__init__( scope_idx, num_output_units, - num_channels=num_channels, semiring=semiring, ) self.num_states = num_states @@ -261,13 +244,12 @@ def _valid_weight_shape(self, p: TorchParameter) -> bool: @property def _weight_shape(self) -> tuple[int, ...]: - return self.num_output_units, self.num_channels, self.num_states + return self.num_output_units, self.num_states @property def config(self) -> Mapping[str, Any]: return { "num_output_units": self.num_output_units, - "num_channels": self.num_channels, "num_states": self.num_states, } @@ -278,18 +260,11 @@ def params(self) -> Mapping[str, TorchParameter]: def forward(self, x: Tensor) -> Tensor: if x.is_floating_point(): x = x.long() # The input to Embedding should be discrete - x = x.squeeze(dim=3) # (F, C, B) + x = x.squeeze(dim=2) # (F, B) weight = self.weight() - if self.num_channels == 1: - idx_fold = torch.arange(self.num_folds, device=weight.device) - x = weight[:, :, 0][idx_fold[:, None], :, x[:, 0]] - x = self.semiring.map_from(x, SumProductSemiring) - else: - idx_fold = torch.arange(self.num_folds, device=weight.device)[:, None, None] - idx_channel = torch.arange(self.num_channels, device=weight.device)[None, :, None] - x = weight[idx_fold, :, idx_channel, x] - x = self.semiring.map_from(x, SumProductSemiring) - x = self.semiring.prod(x, dim=1) + idx_fold = torch.arange(self.num_folds) + x = weight[idx_fold[:, None], :, x] + x = self.semiring.map_from(x, SumProductSemiring) return x # (F, B, K) @@ -340,7 +315,6 @@ def __init__( self, scope_idx: Tensor, num_output_units: int, - num_channels: int = 1, *, num_categories: int = 2, probs: TorchParameter | None = None, @@ -355,12 +329,11 @@ def __init__( Alternatively, a tensor of shape $(D,)$ can be specified, which will be interpreted as a tensor of shape $(1, D)$, i.e., with $F = 1$. num_output_units: The number of output units. - num_channels: The number of channels. num_categories: The number of categories for Categorical distribution. - probs: The probabilities parameter of shape $(F, K, C, V)$, where $K$ is the number of - output units, $C$ is the number of channels, and $V$ is the number of categories. - logits: The logits parameter of shape $(F, K, C, V)$, where $K$ is the number of - output units, $C$ is the number of channels, and $V$ is the number of categories. + probs: The probabilities parameter of shape $(F, K, N)$, where $K$ is the number of + output units, and $V$ is the number of categories. + logits: The logits parameter of shape $(F, K, N)$, where $K$ is the number of + output units, and $V$ is the number of categories. semiring: The evaluation semiring. Defaults to [SumProductSemiring][cirkit.backend.torch.semiring.SumProductSemiring]. @@ -380,7 +353,6 @@ def __init__( super().__init__( scope_idx, num_output_units, - num_channels=num_channels, semiring=semiring, ) self.num_categories = num_categories @@ -410,13 +382,12 @@ def _valid_parameter_shape(self, p: TorchParameter) -> bool: @property def _probs_logits_shape(self) -> tuple[int, ...]: - return self.num_output_units, self.num_channels, self.num_categories + return self.num_output_units, self.num_categories @property def config(self) -> Mapping[str, Any]: return { "num_output_units": self.num_output_units, - "num_channels": self.num_channels, "num_categories": self.num_categories, } @@ -429,17 +400,12 @@ def params(self) -> Mapping[str, TorchParameter]: def log_unnormalized_likelihood(self, x: Tensor) -> Tensor: if x.is_floating_point(): x = x.long() # The input to Categorical should be discrete - # x: (F, C, B, 1) -> (F, C, B) - x = x.squeeze(dim=3) - # logits: (F, K, C, N) + # x: (F, B, 1) -> (F, B) + x = x.squeeze(dim=2) + # logits: (F, K, N) logits = torch.log(self.probs()) if self.logits is None else self.logits() - if self.num_channels == 1: - idx_fold = torch.arange(self.num_folds, device=logits.device) - x = logits[:, :, 0][idx_fold[:, None], :, x[:, 0]] - else: - idx_fold = torch.arange(self.num_folds, device=logits.device)[:, None, None] - idx_channel = torch.arange(self.num_channels, device=logits.device)[None, :, None] - x = torch.sum(logits[idx_fold, :, idx_channel, x], dim=1) + idx_fold = torch.arange(self.num_folds) + x = logits[idx_fold[:, None], :, x] return x def log_partition_function(self) -> Tensor: @@ -453,8 +419,8 @@ def log_partition_function(self) -> Tensor: def sample(self, num_samples: int = 1) -> Tensor: logits = torch.log(self.probs()) if self.logits is None else self.logits() dist = distributions.Categorical(logits=logits) - samples = dist.sample((num_samples,)) # (N, F, K, C) - samples = samples.permute(1, 3, 2, 0) # (F, C, K, N) + samples = dist.sample((num_samples,)) # (N, F, K) + samples = samples.permute(1, 2, 0) # (F, K, N) return samples @@ -471,7 +437,6 @@ def __init__( scope_idx: Tensor, num_output_units: int, *, - num_channels: int = 1, total_count: int = 1, probs: TorchParameter | None = None, logits: TorchParameter | None = None, @@ -485,12 +450,11 @@ def __init__( Alternatively, a tensor of shape $(D,)$ can be specified, which will be interpreted as a tensor of shape $(1, D)$, i.e., with $F = 1$. num_output_units: The number of output units. - num_channels: The number of channels. total_count: The number of trials. - probs: The probabilities parameter of shape $(F, K, C)$, where $K$ is the number of - output units, and $C$ is the number of channels. - logits: The logits parameter of shape $(F, K, C)$, where $K$ is the number of - output units, and $C$ is the number of channels. + probs: The probabilities parameter of shape $(F, K)$, where $K$ is the number of + output units. + logits: The logits parameter of shape $(F, K)$, where $K$ is the number of + output units. semiring: The evaluation semiring. Defaults to [SumProductSemiring][cirkit.backend.torch.semiring.SumProductSemiring]. @@ -508,7 +472,6 @@ def __init__( super().__init__( scope_idx, num_output_units, - num_channels=num_channels, semiring=semiring, ) self.total_count = total_count @@ -539,13 +502,12 @@ def _valid_parameter_shape(self, p: TorchParameter) -> bool: @property def _probs_logits_shape(self) -> tuple[int, ...]: - return self.num_output_units, self.num_channels + return (self.num_output_units,) @property def config(self) -> Mapping[str, Any]: return { "num_output_units": self.num_output_units, - "num_channels": self.num_channels, "total_count": self.total_count, } @@ -558,15 +520,14 @@ def params(self) -> Mapping[str, TorchParameter]: def log_unnormalized_likelihood(self, x: Tensor) -> Tensor: if x.is_floating_point(): x = x.long() # The input to Binomial should be discrete - x = x.permute(0, 2, 3, 1) # (F, C, B, 1) -> (F, B, 1, C) if self.logits is not None: - logits = self.logits().unsqueeze(dim=1) # (F, 1, K, C) + logits = self.logits().unsqueeze(dim=1) # (F, 1, K) dist = distributions.Binomial(self.total_count, logits=logits) else: - probs = self.probs().unsqueeze(dim=1) # (F, 1, K, C) + probs = self.probs().unsqueeze(dim=1) # (F, 1, K) dist = distributions.Binomial(self.total_count, probs=probs) - x = dist.log_prob(x) # (F, B, K, C) - return torch.sum(x, dim=3) + x = dist.log_prob(x) # (F, B, K) + return x def log_partition_function(self) -> Tensor: device = self.logits.device if self.logits is not None else self.probs.device @@ -575,8 +536,8 @@ def log_partition_function(self) -> Tensor: def sample(self, num_samples: int = 1) -> Tensor: logits = torch.log(self.probs()) if self.logits is None else self.logits() dist = distributions.Binomial(self.total_count, logits=logits) - samples = dist.sample((num_samples,)) # (num_samples, F, K, C) - samples = samples.permute(1, 3, 2, 0) # (F, C, K, num_samples) + samples = dist.sample((num_samples,)) # (num_samples, F, K) + samples = samples.permute(1, 2, 0) # (F, K, num_samples) return samples @@ -588,7 +549,6 @@ def __init__( self, scope_idx: Tensor, num_output_units: int, - num_channels: int = 1, *, mean: TorchParameter, stddev: TorchParameter, @@ -603,12 +563,11 @@ def __init__( Alternatively, a tensor of shape $(D,)$ can be specified, which will be interpreted as a tensor of shape $(1, D)$, i.e., with $F = 1$. num_output_units: The number of output units. - num_channels: The number of channels. - mean: The mean parameter, having shape $(F, K, C)$, where $K$ is the number of - output units and $C$ is the number of channels. - stddev: The standard deviation parameter, having shape $(F, K, C)$, where $K$ is the - number of output units and $C$ is the number of channels. - log_partition: An optional parameter of shape $(F, K, C)$, encoding the log-partition. + mean: The mean parameter, having shape $(F, K)$, where $K$ is the number of + output units. + stddev: The standard deviation parameter, having shape $(F, K$, where $K$ is the + number of output units. + log_partition: An optional parameter of shape $(F, K$, encoding the log-partition. function. If this is not None, then the Gaussian layer encodes unnormalized Gaussian likelihoods, which are then normalized with the given log-partition function. @@ -626,7 +585,6 @@ def __init__( super().__init__( scope_idx, num_output_units, - num_channels=num_channels, semiring=semiring, ) if not self._valid_mean_stddev_shape(mean): @@ -663,15 +621,15 @@ def _valid_log_partition_shape(self, log_partition: TorchParameter) -> bool: @property def _mean_stddev_shape(self) -> tuple[int, ...]: - return self.num_output_units, self.num_channels + return (self.num_output_units,) @property def _log_partition_shape(self) -> tuple[int, ...]: - return self.num_output_units, self.num_channels + return (self.num_output_units,) @property def config(self) -> Mapping[str, Any]: - return {"num_output_units": self.num_output_units, "num_channels": self.num_channels} + return {"num_output_units": self.num_output_units} @property def params(self) -> Mapping[str, TorchParameter]: @@ -681,14 +639,12 @@ def params(self) -> Mapping[str, TorchParameter]: return params def log_unnormalized_likelihood(self, x: Tensor) -> Tensor: - mean = self.mean().unsqueeze(dim=1) # (F, 1, K, C) - stddev = self.stddev().unsqueeze(dim=1) # (F, 1, K, C) - x = x.permute(0, 2, 3, 1) # (F, C, B, 1) -> (F, B, 1, C) - x = distributions.Normal(loc=mean, scale=stddev).log_prob(x) # (F, B, K, C) - x = torch.sum(x, dim=3) # (F, B, K) + mean = self.mean().unsqueeze(dim=1) # (F, 1, K) + stddev = self.stddev().unsqueeze(dim=1) # (F, 1, K) + x = distributions.Normal(loc=mean, scale=stddev).log_prob(x) # (F, B, K) if self.log_partition is not None: - log_partition = self.log_partition() # (F, K, C) - x = x + torch.sum(log_partition, dim=2).unsqueeze(dim=1) + log_partition = self.log_partition() # (F, K) + x = x + log_partition.unsqueeze(dim=1) return x def log_partition_function(self) -> Tensor: @@ -696,13 +652,13 @@ def log_partition_function(self) -> Tensor: return torch.zeros( size=(self.num_folds, 1, self.num_output_units), device=self.mean.device ) - log_partition = self.log_partition() # (F, K, C) - return torch.sum(log_partition, dim=2).unsqueeze(dim=1) + log_partition = self.log_partition() # (F, K) + return log_partition.unsqueeze(dim=1) # (F, 1, K) def sample(self, num_samples: int = 1) -> Tensor: dist = distributions.Normal(loc=self.mean(), scale=self.stddev()) - samples = dist.sample((num_samples,)) # (N, F, K, C) - samples = samples.permute(1, 3, 2, 0) # (F, C, K, N) + samples = dist.sample((num_samples,)) # (N, F, K) + samples = samples.permute(1, 2, 0) # (F, K, N) return samples @@ -779,9 +735,8 @@ def __init__( Args: layer: The input layer on which compute the evidence of. observation: The observation, i.e., the input to pass to the given input layer. - It must be a parameter of shape $(F, C, D)$, where $F$ is the number of folds - of the given layer, $D$ is the number variables the given layer is defined on, - and $C$ is the number channels per variable. + It must be a parameter of shape $(F, D)$, where $F$ is the number of folds + of the given layer, $D$ is the number variables the given layer is defined on. semiring: The evaluation semiring. Defaults to [SumProductSemiring][cirkit.backend.torch.semiring.SumProductSemiring]. @@ -794,21 +749,14 @@ def __init__( f"The number of folds in the observation and in the layer should be the same, " f"but found {observation.num_folds} and {layer.num_folds} respectively" ) - if len(observation.shape) != 2: + if len(observation.shape) != 1: raise ValueError( - f"Expected observation of shape (num_channels, num_variables), " - f"but found {observation.shape}" + f"Expected observation of shape (num_variables,), " f"but found {observation.shape}" ) - num_channels, num_variables = observation.shape - if num_channels != layer.num_channels: - raise ValueError( - f"Expected an observation with number of channels {layer.num_channels}, " - f"but found {num_channels}" - ) - if num_variables != layer.num_variables: + if observation.shape[0] != layer.num_variables: raise ValueError( f"Expected an observation with number of variables {layer.num_variables}, " - f"but found {num_variables}" + f"but found {observation.shape[0]}" ) super().__init__(layer.num_output_units, layer.num_folds, semiring=semiring) self.layer = layer @@ -827,8 +775,8 @@ def sub_modules(self) -> Mapping[str, TorchInputLayer]: return {"layer": self.layer} def forward(self, batch_size: int) -> Tensor: - obs = self.observation() # (F, C, D) - obs = obs.unsqueeze(dim=2) # (F, C, 1, D) + obs = self.observation() # (F, D) + obs = obs.unsqueeze(dim=1) # (F, 1, D) x = self.layer(obs) # (F, 1, K) return x.expand(x.shape[0], batch_size, x.shape[2]) @@ -836,8 +784,8 @@ def sample(self, num_samples: int = 1) -> Tensor: if self.num_variables != 1: raise NotImplementedError("Sampling a multivariate Evidence layer is not implemented") # Sampling an evidence layer translates to return the given observation - obs = self.observation() # (F, C, D=1) - obs = obs.unsqueeze(dim=-1) # (F, C, 1, 1) + obs = self.observation() # (F, D=1) + obs = obs.unsqueeze(dim=-1) # (F, 1, 1) return obs.expand(size=(-1, -1, self.num_output_units, num_samples)) @@ -848,7 +796,6 @@ def __init__( self, scope_idx: Tensor, num_output_units: int, - num_channels: int = 1, *, degree: int, coeff: TorchParameter, @@ -862,7 +809,6 @@ def __init__( on. Alternatively, a tensor of shape $(D,)$ can be specified, which will be interpreted as a tensor of shape $(1, D)$, i.e., with $F = 1$. num_output_units: The number of output units. - num_channels: The number of channels. degree: The degree of polynomial. coeff: The coefficient parameter, having shape $(F, K, \mathsf{degree} + 1)$, where $K$ is the number of output units. @@ -874,12 +820,9 @@ def __init__( num_variables = scope_idx.shape[-1] if num_variables != 1: raise ValueError("The Polynomial layer encodes a univariate distribution") - if num_channels != 1: - raise ValueError("The Polynomial layer encodes a univariate distribution") super().__init__( scope_idx, num_output_units, - num_channels=num_channels, semiring=semiring, ) self.degree = degree @@ -926,7 +869,6 @@ def _polyval(coeff: Tensor, x: Tensor) -> Tensor: def config(self) -> Mapping[str, Any]: return { "num_output_units": self.num_output_units, - "num_channels": self.num_channels, "degree": self.degree, } diff --git a/cirkit/backend/torch/layers/optimized.py b/cirkit/backend/torch/layers/optimized.py index fe866d0f..eeefd122 100644 --- a/cirkit/backend/torch/layers/optimized.py +++ b/cirkit/backend/torch/layers/optimized.py @@ -180,22 +180,20 @@ def sample(self, x: Tensor) -> tuple[Tensor, Tensor]: if not normalized: raise ValueError("Sampling only works with a normalized parametrization") - # x: (F, H, C, K, num_samples, D) - x = torch.sum(x, dim=1, keepdim=True) # (F, H=1, C, K, num_samples, D) + # x: (F, H, K, num_samples, D) + x = torch.sum(x, dim=1) # (F, K, num_samples, D) - c = x.shape[2] - d = x.shape[-1] - num_samples = x.shape[-2] + num_samples = x.shape[2] + d = x.shape[3] # mixing_distribution: (F, O, K) mixing_distribution = torch.distributions.Categorical(probs=weight) mixing_samples = mixing_distribution.sample((num_samples,)) - mixing_samples = E.rearrange(mixing_samples, "n f o -> f o n") - mixing_indices = E.repeat(mixing_samples, "f o n -> f a c o n d", a=1, c=c, d=d) + mixing_samples = E.rearrange(mixing_samples, "n f k -> f k n") + mixing_indices = E.repeat(mixing_samples, "f k n -> f k n d", d=d) - x = torch.gather(x, dim=-3, index=mixing_indices) - x = x[:, 0] + x = torch.gather(x, dim=1, index=mixing_indices) return x, mixing_samples diff --git a/cirkit/backend/torch/parameters/nodes.py b/cirkit/backend/torch/parameters/nodes.py index e69b7de0..dd68ab11 100644 --- a/cirkit/backend/torch/parameters/nodes.py +++ b/cirkit/backend/torch/parameters/nodes.py @@ -813,18 +813,13 @@ def __init__( ) -> None: assert in_mean1_shape == in_stddev1_shape assert in_mean2_shape == in_stddev2_shape - assert in_mean1_shape[1] == in_mean2_shape[1] - assert in_stddev1_shape[1] == in_stddev2_shape[1] super().__init__( in_mean1_shape, in_stddev1_shape, in_mean2_shape, in_stddev2_shape, num_folds=num_folds ) @property def shape(self) -> tuple[int, ...]: - return ( - self.in_shapes[0][0] * self.in_shapes[2][0], - self.in_shapes[0][1], - ) + return (self.in_shapes[0][0] * self.in_shapes[2][0],) @property def config(self) -> dict[str, Any]: @@ -855,15 +850,11 @@ def __init__( *, num_folds: int = 1, ) -> None: - assert in_stddev1_shape[1] == in_stddev2_shape[1] super().__init__(in_stddev1_shape, in_stddev2_shape, num_folds=num_folds) @property def shape(self) -> tuple[int, ...]: - return ( - self.in_shapes[0][0] * self.in_shapes[1][0], - self.in_shapes[0][1], - ) + return (self.in_shapes[0][0] * self.in_shapes[1][0],) @property def config(self) -> dict[str, Any]: @@ -890,8 +881,6 @@ def __init__( ) -> None: assert in_mean1_shape == in_stddev1_shape assert in_mean2_shape == in_stddev2_shape - assert in_mean1_shape[1] == in_mean2_shape[1] - assert in_stddev1_shape[1] == in_stddev2_shape[1] super().__init__( in_mean1_shape, in_stddev1_shape, in_mean2_shape, in_stddev2_shape, num_folds=num_folds ) @@ -899,10 +888,7 @@ def __init__( @property def shape(self) -> tuple[int, ...]: - return ( - self.in_shapes[0][0] * self.in_shapes[2][0], - self.in_shapes[0][1], - ) + return (self.in_shapes[0][0] * self.in_shapes[2][0],) @property def config(self) -> dict[str, Any]: diff --git a/cirkit/backend/torch/parameters/optimized.py b/cirkit/backend/torch/parameters/optimized.py index ccf1c541..52586570 100644 --- a/cirkit/backend/torch/parameters/optimized.py +++ b/cirkit/backend/torch/parameters/optimized.py @@ -2,7 +2,6 @@ from typing import Any import torch -from einops import einsum from torch import Tensor from cirkit.backend.torch.parameters.nodes import TorchParameterOp diff --git a/cirkit/backend/torch/parameters/pic.py b/cirkit/backend/torch/parameters/pic.py index a3c95611..d4cd37ee 100644 --- a/cirkit/backend/torch/parameters/pic.py +++ b/cirkit/backend/torch/parameters/pic.py @@ -79,7 +79,6 @@ def __init__( self, num_variables: int, num_param: int, - num_channels: bool | None = 1, net_dim: int | None = 64, bias: bool | None = False, sharing: str | None = "none", @@ -94,7 +93,6 @@ def __init__( assert sharing in ["none", "f", "c"] self.num_variables = num_variables self.num_param = num_param - self.num_channels = num_channels self.sharing = sharing self.tensor_parameter = tensor_parameter self.reparam = reparam @@ -102,8 +100,8 @@ def __init__( self.register_buffer("z_quad", z_quad) ff_dim = net_dim if ff_dim is None else ff_dim - inner_conv_groups = num_channels * (1 if sharing in ["f", "c"] else num_variables) - last_conv_groups = num_channels * (1 if sharing == "f" else num_variables) + inner_conv_groups = 1 if sharing in ["f", "c"] else num_variables + last_conv_groups = 1 if sharing == "f" else num_variables self.net = nn.Sequential( FourierLayer(1, ff_dim, sigma=ff_sigma, learnable=learn_ff), nn.Conv1d( @@ -126,12 +124,10 @@ def __init__( # initialize all heads to be equal when using composite sharing if sharing == "c": self.net[-1].weight.data = ( - self.net[-1].weight.data[: num_param * num_channels].repeat(num_variables, 1, 1) + self.net[-1].weight.data[:num_param].repeat(num_variables, 1, 1) ) if self.net[-1].bias is not None: - self.net[-1].bias.data = ( - self.net[-1].bias.data[: num_param * num_channels].repeat(num_variables) - ) + self.net[-1].bias.data = self.net[-1].bias.data[:num_param].repeat(num_variables) if tensor_parameter is not None and z_quad is not None: with torch.no_grad(): @@ -141,17 +137,13 @@ def forward(self, z_quad: torch.Tensor | None = None, n_chunks: int | None = 1): z_quad = self.z_quad if z_quad is None else z_quad assert z_quad.ndim == 1 self.net[1].groups = 1 - self.net[-1].groups = self.num_channels * ( - 1 if self.sharing in ["f", "c"] else self.num_variables - ) + self.net[-1].groups = 1 if self.sharing in ["f", "c"] else self.num_variables param = torch.cat( [self.net(chunk.unsqueeze(1)) for chunk in z_quad.chunk(n_chunks, dim=0)], dim=1 ) if self.sharing == "f": param = param.unsqueeze(0).expand(self.num_variables, -1, -1) - param = param.view( - self.num_variables, self.num_param * self.num_channels, len(z_quad) - ).transpose(1, 2) + param = param.view(self.num_variables, self.num_param, len(z_quad)).transpose(1, 2) if self.tensor_parameter is not None: param = param.view_as(self.tensor_parameter._ptensor) self.tensor_parameter._ptensor = param @@ -294,7 +286,8 @@ def param_to_buffer(model: torch.nn.Module): """Turns all parameters of a module into buffers.""" modules = model.modules() module = next(modules) - for name, param in module.named_parameters(recurse=False): + named_parameters = list(module.named_parameters(recurse=False)) + for name, param in named_parameters: delattr(module, name) # Unregister parameter module.register_buffer(name, param.data) for module in modules: @@ -317,7 +310,6 @@ def param_to_buffer(model: torch.nn.Module): input_net = PICInputNet( num_variables=node.num_variables * node.num_folds, num_param=node.num_categories, - num_channels=node.num_channels, net_dim=net_dim, bias=bias, sharing=input_sharing, @@ -340,7 +332,6 @@ def param_to_buffer(model: torch.nn.Module): node.mean = PICInputNet( num_variables=node.num_variables * node.num_folds, num_param=1, - num_channels=node.num_channels, net_dim=net_dim, bias=bias, sharing=input_sharing, @@ -354,7 +345,6 @@ def param_to_buffer(model: torch.nn.Module): node.stddev = PICInputNet( num_variables=node.num_variables * node.num_folds, num_param=1, - num_channels=node.num_channels, net_dim=net_dim, bias=bias, sharing=input_sharing, diff --git a/cirkit/backend/torch/queries.py b/cirkit/backend/torch/queries.py index 2de74047..fd30f8b9 100644 --- a/cirkit/backend/torch/queries.py +++ b/cirkit/backend/torch/queries.py @@ -50,8 +50,8 @@ def __call__(self, x: Tensor, *, integrate_vars: Tensor | Scope | Iterable[Scope """Solve an integration query, given an input batch and the variables to integrate. Args: - x: An input batch of shape $(B, C, D)$, where $B$ is the batch size, $C$ is the number - of channels per variable, and $D$ is the number of variables. + x: An input batch of shape $(B, D)$, where $B$ is the batch size, + and $D$ is the number of variables. integrate_vars: The variables to integrate. It must be a subset of the variables on which the circuit given in the constructor is defined on. The format can be one of the following three: @@ -221,7 +221,7 @@ def __call__(self, num_samples: int = 1) -> tuple[Tensor, list[Tensor]]: A pair (samples, mixture_samples), consisting of (i) an assignment to the observed variables the circuit is defined on, and (ii) the samples of the finitely-discrete latent variables associated to the sum units. The samples (i) are returned as a - tensor of shape (num_samples, num_channels, num_variables). + tensor of shape (num_samples, num_variables). Raises: ValueError: if the number of samples is not a positive number. @@ -230,7 +230,7 @@ def __call__(self, num_samples: int = 1) -> tuple[Tensor, list[Tensor]]: raise ValueError("The number of samples must be a positive number") mixture_samples: list[Tensor] = [] - # samples: (O, C, K, num_samples, D) + # samples: (O, K, num_samples, D) samples = self._circuit.evaluate( module_fn=functools.partial( self._layer_fn, @@ -238,10 +238,10 @@ def __call__(self, num_samples: int = 1) -> tuple[Tensor, list[Tensor]]: mixture_samples=mixture_samples, ), ) - # samples: (num_samples, O, K, C, D) - samples = samples.permute(3, 0, 2, 1, 4) + # samples: (num_samples, O, K, D) + samples = samples.permute(2, 0, 1, 3) # TODO: fix for the case of multi-output circuits, i.e., O != 1 or K != 1 - samples = samples[:, 0, 0] # (num_samples, C, D) + samples = samples[:, 0, 0] # (num_samples, D) return samples, mixture_samples def _layer_fn( @@ -269,10 +269,10 @@ def _pad_samples(self, samples: Tensor, scope_idx: Tensor) -> Tensor: if scope_idx.shape[1] != 1: raise NotImplementedError("Padding is only implemented for univariate samples") - # padded_samples: (F, C, K, num_samples, D) + # padded_samples: (F, K, num_samples, D) padded_samples = torch.zeros( (*samples.shape, len(self._circuit.scope)), device=samples.device, dtype=samples.dtype ) fold_idx = torch.arange(samples.shape[0], device=samples.device) - padded_samples[fold_idx, :, :, :, scope_idx.squeeze(dim=1)] = samples + padded_samples[fold_idx, :, :, scope_idx.squeeze(dim=1)] = samples return padded_samples diff --git a/cirkit/backend/torch/rules/layers.py b/cirkit/backend/torch/rules/layers.py index 8494d71c..762d717d 100644 --- a/cirkit/backend/torch/rules/layers.py +++ b/cirkit/backend/torch/rules/layers.py @@ -36,7 +36,6 @@ def compile_embedding_layer(compiler: "TorchCompiler", sl: EmbeddingLayer) -> To return TorchEmbeddingLayer( torch.tensor(tuple(sl.scope)), sl.num_output_units, - num_channels=sl.num_channels, num_states=sl.num_states, weight=weight, semiring=compiler.semiring, @@ -55,7 +54,6 @@ def compile_categorical_layer( return TorchCategoricalLayer( torch.tensor(tuple(sl.scope)), sl.num_output_units, - num_channels=sl.num_channels, num_categories=sl.num_categories, probs=probs, logits=logits, @@ -73,7 +71,6 @@ def compile_binomial_layer(compiler: "TorchCompiler", sl: BinomialLayer) -> Torc return TorchBinomialLayer( torch.tensor(tuple(sl.scope)), sl.num_output_units, - num_channels=sl.num_channels, total_count=sl.total_count, probs=probs, logits=logits, @@ -91,7 +88,6 @@ def compile_gaussian_layer(compiler: "TorchCompiler", sl: GaussianLayer) -> Torc return TorchGaussianLayer( torch.tensor(tuple(sl.scope)), sl.num_output_units, - num_channels=sl.num_channels, mean=mean, stddev=stddev, log_partition=log_partition, @@ -106,7 +102,6 @@ def compile_polynomial_layer( return TorchPolynomialLayer( torch.tensor(tuple(sl.scope)), sl.num_output_units, - num_channels=sl.num_channels, degree=sl.degree, coeff=coeff, semiring=compiler.semiring, diff --git a/cirkit/symbolic/circuit.py b/cirkit/symbolic/circuit.py index ddc1dbfb..5877849a 100644 --- a/cirkit/symbolic/circuit.py +++ b/cirkit/symbolic/circuit.py @@ -226,7 +226,6 @@ class Circuit(DiAcyclicGraph[Layer]): def __init__( self, - num_channels: int, layers: Sequence[Layer], in_layers: Mapping[Layer, Sequence[Layer]], outputs: Sequence[Layer], @@ -236,14 +235,12 @@ def __init__( """Initializes a symbolic circuit. Args: - num_channels: The number of channels for each variable. layers: The list of symbolic layers. in_layers: A dictionary containing the list of inputs to each layer. outputs: The output layers of the circuit. operation: The optional operation the circuit has been obtained through. """ super().__init__(layers, in_layers, outputs) - self.num_channels = num_channels self.operation = operation # Build scopes bottom-up, and check the consistency of the layers, w.r.t. @@ -279,7 +276,7 @@ def num_variables(self) -> int: Returns: int: """ - return len(self.scope) * self.num_channels + return len(self.scope) def layer_scope(self, sl: Layer) -> Scope: """Retrieves the scope of a layer. @@ -378,7 +375,7 @@ def subgraph(self, *outputs: Layer) -> "Circuit": The sub-circuit having the given layers as outputs. """ layers, in_layers = subgraph(outputs, self.layer_inputs) - return Circuit(self.num_channels, layers, in_layers, outputs=outputs) + return Circuit(layers, in_layers, outputs=outputs) ##################################### Structural properties #################################### @@ -455,7 +452,6 @@ def properties(self) -> StructuralProperties: @classmethod def from_operation( cls, - num_channels: int, blocks: list[CircuitBlock], in_blocks: dict[CircuitBlock, Sequence[CircuitBlock]], output_blocks: list[CircuitBlock], @@ -465,7 +461,6 @@ def from_operation( """Constructs a circuit that resulted from an operation over other circuits. Args: - num_channels: The number of channels per variable. blocks: The list of circuit blocks. in_blocks: A dictionary containing the list of block inputs to each circuit block. output_blocks: The outputs blocks of the circuit. @@ -497,7 +492,7 @@ def from_operation( for sl in b.layers: in_layers[sl].extend(b.layer_inputs(sl)) # Build the circuit and set the operation - return cls(num_channels, layers, in_layers, outputs, operation=operation) + return cls(layers, in_layers, outputs, operation=operation) def are_compatible(sc1: Circuit, sc2: Circuit) -> bool: diff --git a/cirkit/symbolic/functional.py b/cirkit/symbolic/functional.py index 83022f7f..943190ed 100644 --- a/cirkit/symbolic/functional.py +++ b/cirkit/symbolic/functional.py @@ -40,20 +40,7 @@ def concatenate(scs: Sequence[Circuit], *, registry: OperatorRegistry | None = N Returns: A circuit obtained by concatenating circuits. - - Raises: - ValueError: If the given circuits to concatenate have different number of channels per - variable. """ - # Retrieve the number of channels - num_channels_s = {sc.num_channels for sc in scs} - if len(num_channels_s) != 1: - raise ValueError( - f"Only circuits with the same number of channels can be concatenated, " - f"but found a set of number of channels {num_channels_s}" - ) - num_channels = scs[0].num_channels - # Mapping the symbolic circuit layers with blocks of circuit layers layers_to_block: dict[Layer, CircuitBlock] = {} @@ -74,7 +61,6 @@ def concatenate(scs: Sequence[Circuit], *, registry: OperatorRegistry | None = N # Construct the symbolic circuit obtained by merging multiple circuits return Circuit.from_operation( - num_channels, blocks, in_blocks, output_blocks, @@ -94,9 +80,7 @@ def evidence( Args: sc: The symbolic circuit where some variables have to be observed. obs: The observation data, stored as a dictionary mapping variable integer identifiers - to numbers, i.e., either integer, float or complex values. In the case the - circuit defines multiple channels per variable, then this is a dictionary mapping - variable integer identifiers to tuples of as many numbers as the number of channels. + to numbers, i.e., either integer, float or complex values. registry: A registry of symbolic layer operators. If it is None, then the one in the current context will be used. See the [OPERATOR_REGISTRY][cirkit.symbolic.registry.OPERATOR_REGISTRY] context variable @@ -109,16 +93,6 @@ def evidence( ValueError: If the observation contains variables not defined in the scope of the circuit. NotImplementedError: If the evidence of a multivariate input layer needs to be constructed. """ - if not all( - (isinstance(value, Number) or len(value) == 1) - if sc.num_channels == 1 - else len(value) == sc.num_channels - for (var, value) in obs.items() - ): - raise ValueError( - "The observation of each variable should contain as many " - "values as the number of channels" - ) # Check the variables to observe scope = Scope(obs.keys()) if not scope: @@ -144,15 +118,11 @@ def evidence( # Build the observation parameter, as a constant tensor that # contains assignments to the variables being observed - # The shape of the observation parameter is (C, D), where C is the - # number of channels and D is the number of variables the layer - # depends on - obs_shape = sc.num_channels, len(sl.scope) - # obs_ndarray: An array of shape either (D,) or (D, C) + # The shape of the observation parameter is (D,), where D + # is the number of variables the layer depends on obs_ndarray = np.array([obs[var] for var in sorted(sl.scope)]) - obs_ndarray = obs_ndarray[None, :] if len(obs_ndarray.shape) == 1 else obs_ndarray.T - # A constant parameter of shape (C, D), where C can be 1. - obs_parameter = ConstantParameter(*obs_shape, value=obs_ndarray) + # A constant parameter of shape (D,) + obs_parameter = ConstantParameter(len(sl.scope), value=obs_ndarray) # Build the evidence layer, with a reference to the input layer evi_sl = EvidenceLayer(sl.copyref(), observation=Parameter.from_input(obs_parameter)) @@ -173,7 +143,6 @@ def evidence( # Construct the evidence symbolic circuit and set the evidence operation metadata return Circuit.from_operation( - sc.num_channels, blocks, in_blocks, output_blocks, @@ -272,7 +241,6 @@ def integrate( # Construct the integral symbolic circuit and set the integration operation metadata return Circuit.from_operation( - sc.num_channels, blocks, in_blocks, output_blocks, @@ -433,7 +401,6 @@ def multiply(sc1: Circuit, sc2: Circuit, *, registry: OperatorRegistry | None = # Construct the product symbolic circuit return Circuit.from_operation( - sc1.num_channels, blocks, in_blocks, output_blocks, @@ -452,26 +419,6 @@ class _ScopeVarAndBlockAndInputs(NamedTuple): diff_in_blocks: list[CircuitBlock] # The inputs to the layer of diff_block. -_T = TypeVar("_T") # TODO: for _repeat. move together - - -# TODO: this can be made public and moved to utils, might be used elsewhere. -def _repeat(iterable: Iterable[_T], /, *, times: int) -> Iterable[_T]: - """Repeat each element of the given iterable by given times. - - The elements are generated lazily. The iterable passed in will be iterated once. - This function differs from itertools in that it repeats an interable instead of only one elem. - - Args: - iterable (Iterable[_T]): The iterable to generate the original elements. - times (int): The times to repeat each element. - - Returns: - Iterable[_T]: The iterable with repeated elements. - """ - return itertools.chain.from_iterable(itertools.repeat(elem, times=times) for elem in iterable) - - def differentiate( sc: Circuit, order: int = 1, *, registry: OperatorRegistry | None = None ) -> Circuit: @@ -515,17 +462,14 @@ def differentiate( in_blocks: dict[CircuitBlock, Sequence[CircuitBlock]] = {} for sl in sc.topological_ordering(): - # "diff_blocks: List[CircuitBlock]" is the diff of sl wrt each variable and channel in order + # "diff_blocks: List[CircuitBlock]" is the diff of sl wrt each variable in order # and then at the end we append a copy of sl if isinstance(sl, InputLayer): # TODO: no type hint for func, also cannot quick jump in static analysis func = registry.retrieve_rule(LayerOperator.DIFFERENTIATION, type(sl)) diff_blocks = [ - func(sl, var_idx=var_idx, ch_idx=ch_idx, order=order) - for var_idx, ch_idx in itertools.product( - range(len(sl.scope)), range(sc.num_channels) - ) + func(sl, var_idx=var_idx, order=order) for var_idx in range(len(sl.scope)) ] elif isinstance(sl, SumLayer): @@ -558,11 +502,11 @@ def differentiate( # Each item is a list of length (num_vars * num_chs) of that input, corresponding to the # diff wrt each var and ch of that input. all_scope_var_diff_block = ( - # Each list is all the diffs of sl wrt each var and each channel in the scope of + # Each list is all the diffs of sl wrt each var in the scope of # the cur_layer in the input of sl. [ # Each named-tuple is a diff of sl and its inputs, where the diff is wrt the - # current variable and channel as in the double loop. + # current variable as in the double loop. _ScopeVarAndBlockAndInputs( # Label the named-tuple as the var id in the whole scope, for sorting. scope_var=scope_var, @@ -577,10 +521,9 @@ def differentiate( ) # Loop over the (num_vars * num_chs) diffs of cur_layer, while also providing # the corresponding scope_var which the current diff is wrt. - # We need the scope_var to label and sort the diff layers of sl. We do nnt need - # channel ids because they are always saved densely in order. + # We need the scope_var to label and sort the diff layers of sl. for scope_var, diff_cur_layer in zip( - _repeat(sc.layer_scope(cur_layer), times=sc.num_channels), + sc.layer_scope(cur_layer), layers_to_blocks[cur_layer][:-1], ) ] @@ -630,7 +573,6 @@ def differentiate( # Construct the integral symbolic circuit and set the integration operation metadata return Circuit.from_operation( - sc.num_channels, sum(layers_to_blocks.values(), []), in_blocks, sum((layers_to_blocks[sl] for sl in sc.outputs), []), @@ -695,7 +637,6 @@ def conjugate( # Construct the conjugate symbolic circuit return Circuit.from_operation( - sc.num_channels, blocks, in_blocks, output_blocks, diff --git a/cirkit/symbolic/layers.py b/cirkit/symbolic/layers.py index c6ecd979..164fa7a8 100644 --- a/cirkit/symbolic/layers.py +++ b/cirkit/symbolic/layers.py @@ -116,22 +116,19 @@ def __repr__(self) -> str: class InputLayer(Layer, ABC): """The symbolic input layer class.""" - def __init__(self, scope: Scope, num_output_units: int, num_channels: int = 1): + def __init__(self, scope: Scope, num_output_units: int): """Initializes a symbolic input layer. Args: scope: The variables scope of the layer. num_output_units: The number of input units in the layer. - num_channels: The number of channels for each variable in the scope. Raises: - ValueError: If the number of outputs or the number of channels are not positive. + ValueError: If the number of outputs is not positive. """ if num_output_units <= 0: raise ValueError("The number of output units should be positive") - if num_channels <= 0: - raise ValueError("The number of channels should be positive") - super().__init__(len(scope), num_output_units, num_channels) + super().__init__(len(scope), num_output_units) self.scope = scope @property @@ -143,22 +140,12 @@ def num_variables(self) -> int: """ return self.num_input_units - @property - def num_channels(self) -> int: - """The number of channels per variable modelled by the input layer. - - Returns: - int: The number of channels per variable. - """ - return self.arity - def __repr__(self) -> str: config_repr = ", ".join(f"{k}={v}" for k, v in self.config.items()) params_repr = ", ".join(f"{k}={v}" for k, v in self.params.items()) return ( f"{self.__class__.__name__}(" f"scope={self.scope}, " - f"num_channels={self.arity}, " f"num_output_units={self.num_output_units}, " f"config=({config_repr})" f"params=({params_repr})" @@ -188,29 +175,20 @@ def __init__(self, layer: InputLayer, *, observation: Parameter): Args: layer: The symbolic input layer to condition, i.e., to evaluate on the observation. observation: The observation stored as a parameter that outputs a constant (i.e., - non-learnable) tensor of shape $(C, D)$, where $D$ is the number of variable the - symbolic input layer is defined on, and $C$ is the number of channels per variable. + non-learnable) tensor of shape $(D,)$, where $D$ is the number of variable the + symbolic input layer is defined on. Raises: - ValueError: If the observation parameter shape has not two dimensions, or if the - number of its channels (resp. variables) does not match the number of channels - (resp. variables) of the symbolic input layer. + ValueError: If the observation parameter shape has not two dimensions. """ - if len(observation.shape) != 2: - raise ValueError( - f"Expected observation of shape (num_channels, num_variables), " - f"but found {observation.shape}" - ) - num_channels, num_variables = observation.shape - if num_channels != layer.num_channels: + if len(observation.shape) != 1: raise ValueError( - f"Expected an observation with number of channels {layer.num_channels}, " - f"but found {num_channels}" + f"Expected observation of shape (num_variables,), " f"but found {observation.shape}" ) - if num_variables != layer.num_variables: + if observation.shape[0] != layer.num_variables: raise ValueError( f"Expected an observation with number of variables {layer.num_variables}, " - f"but found {num_variables}" + f"but found {observation.shape[0]}" ) super().__init__(layer.num_output_units) self.layer = layer @@ -235,7 +213,6 @@ def __init__( self, scope: Scope, num_output_units: int, - num_channels: int, *, num_states: int = 2, weight: Parameter | None = None, @@ -246,12 +223,10 @@ def __init__( Args: scope: The variables scope the layer depends on. num_output_units: The number of Categorical units in the layer. - num_channels: The number of channels per variable. - num_states: The number of categories for each variable and channel. - weight: The weight parameter of shape $(K, C, N)$, where $K$ is the number of output - units, $C$ is the number of channels, and $N$ is the number of states. If it is - None, then either the weight factory is used (if it is not None) or a - weight parameter is initialized. + num_states: The number of categories for each variable. + weight: The weight parameter of shape $(K, N)$, where $K$ is the number of output + units, and $N$ is the number of states. If it is None, then either the weight + factory is used (if it is not None) or a weight parameter is initialized. weight_factory: A factory used to construct the weight parameter, if it is not given """ @@ -259,7 +234,7 @@ def __init__( raise ValueError("The Embedding layer encodes univariate functions") if num_states <= 1: raise ValueError("The number of states must be at least 2") - super().__init__(scope, num_output_units, num_channels) + super().__init__(scope, num_output_units) self.num_states = num_states if weight is None: if weight_factory is None: @@ -276,7 +251,6 @@ def __init__( def _weight_shape(self) -> tuple[int, ...]: return ( self.num_output_units, - self.num_channels, self.num_states, ) @@ -285,7 +259,6 @@ def config(self) -> Mapping[str, Any]: return { "scope": self.scope, "num_output_units": self.num_output_units, - "num_channels": self.num_channels, "num_states": self.num_states, } @@ -303,7 +276,6 @@ def __init__( self, scope: Scope, num_output_units: int, - num_channels: int = 1, *, num_categories: int, logits: Parameter | None = None, @@ -316,14 +288,12 @@ def __init__( Args: scope: The variables scope the layer depends on. num_output_units: The number of Categorical units in the layer. - num_channels: The number of channels per variable. - num_categories: The number of categories for each variable and channel. - logits: The logits parameter of shape $(K, C, N)$, where $K$ is the number of output - units, $C$ is the number of channels, and $N$ is the number of categories. If it is - None, then either the probabilities parameter is used (if it is not None) or a - probabilities parameter parameterized by a - [SoftmaxParameter][cirkit.symbolic.parameters.SoftmaxParameter]. - probs: The probabilities parameter of shape $(K, C, N)$ (see logits parameter + num_categories: The number of categories for each variable. + logits: The logits parameter of shape $(K, N)$, where $K$ is the number of output + units, and $N$ is the number of categories. If it is None, then either the + probabilities parameter is used (if it is not None) or a probabilities parameter + parameterized by a [SoftmaxParameter][cirkit.symbolic.parameters.SoftmaxParameter]. + probs: The probabilities parameter of shape $(K, N)$ (see logits parameter description). If it is None, then the logits parameter must be specified. logits_factory: A factory used to construct the logits parameter, if neither logits nor probabilities are given. @@ -340,7 +310,7 @@ def __init__( ) if num_categories < 2: raise ValueError("At least two categories must be specified") - super().__init__(scope, num_output_units, num_channels) + super().__init__(scope, num_output_units) self.num_categories = num_categories if logits is None and probs is None: if logits_factory is not None: @@ -365,14 +335,13 @@ def __init__( @property def _probs_logits_shape(self) -> tuple[int, ...]: - return self.num_output_units, self.num_channels, self.num_categories + return self.num_output_units, self.num_categories @property def config(self) -> Mapping[str, Any]: return { "scope": self.scope, "num_output_units": self.num_output_units, - "num_channels": self.num_channels, "num_categories": self.num_categories, } @@ -392,7 +361,6 @@ def __init__( self, scope: Scope, num_output_units: int, - num_channels: int = 1, *, total_count: int = 2, logits: Parameter | None = None, @@ -405,14 +373,12 @@ def __init__( Args: scope: The variables scope the layer depends on. num_output_units: The number of Categorical units in the layer. - num_channels: The number of channels per variable. - total_count: The number of total counts for each variable and channel. - logits: The logits parameter of shape $(K, C)$, where $K$ is the number of output - units, $C$ is the number of channels. If it is None, - then either the probabilities parameter is used (if it is not None) or a - probabilities parameter parameterized by a + total_count: The number of total counts for each variable. + logits: The logits parameter of shape $(K,)$, where $K$ is the number of output + units. If it is None, then either the probabilities parameter is used + (if it is not None) or a probabilities parameter parameterized by a [SigmoidParameter][cirkit.symbolic.parameters.SigmoidParameter]. - probs: The probabilities parameter of shape $(K, C)$ (see logits parameter + probs: The probabilities parameter of shape $(K,)$ (see logits parameter description). If it is None, then the logits parameter must be specified. logits_factory: A factory used to construct the logits parameter, if neither logits nor probabilities are given. @@ -427,7 +393,7 @@ def __init__( ) if total_count < 0: raise ValueError("The number of trials should be non-negative") - super().__init__(scope, num_output_units, num_channels) + super().__init__(scope, num_output_units) self.total_count = total_count if logits is None and probs is None: if logits_factory is not None: @@ -452,14 +418,13 @@ def __init__( @property def _probs_logits_shape(self) -> tuple[int, ...]: - return self.num_output_units, self.num_channels + return (self.num_output_units,) @property def config(self) -> dict: return { "scope": self.scope, "num_output_units": self.num_output_units, - "num_channels": self.num_channels, "total_count": self.total_count, } @@ -479,7 +444,6 @@ def __init__( self, scope: Scope, num_output_units: int, - num_channels: int, *, mean: Parameter | None = None, stddev: Parameter | None = None, @@ -492,25 +456,24 @@ def __init__( Args: scope: The variables scope the layer depends on. num_output_units: The number of Gaussian units in the layer. - num_channels: The number of channels per variable. - mean: The mean parameter of shape $(K, C)$, where $K$ is the number of output units, and - $C$ is the number of channels. If it is None, then a default symbolic parameter will - be instantiated with a + mean: The mean parameter of shape $(K)$, where $K$ is the number of output units. + If it is None, then a default symbolic parameter will be instantiated with a [NormalInitializer][cirkit.symbolic.initializers.NormalInitializer] as symbolic initializer. - stddev: The standard deviation parameter of shape $(K, C)$, where $K$ is the number of - output units, and $C$ is the number of channels. If it is None, then a default - symbolic parameter will be instantiated with a - [NormalInitializer][cirkit.symbolic.initializers.NormalInitializer] as + stddev: The standard deviation parameter of shape $(K)$, where $K$ is the number of + output units. If it is None, then a default symbolic parameter will be instantiated + with a [NormalInitializer][cirkit.symbolic.initializers.NormalInitializer] as symbolic initializer, which is then re-parameterized to be positve using a [ScaledSigmoidParameter][cirkit.symbolic.parameters.ScaledSigmoidParameter]. - mean: A factory used to construct the mean parameter, if it is not specified. - stddev: A factory used to construct the standard deviation parameter, if it is not - specified. + log_partition: The log-partition parameter of the Gaussian, of shape $(K,)$. + If the Gaussian is a normalized Gaussian, then this should be None. + mean_factory: A factory used to construct the mean parameter, if it is not specified. + stddev_factory: A factory used to construct the standard deviation parameter, if it is + not specified. """ if len(scope) != 1: raise ValueError("The Gaussian layer encodes a univariate distribution") - super().__init__(scope, num_output_units, num_channels) + super().__init__(scope, num_output_units) if mean is None: if mean_factory is None: mean = Parameter.from_input( @@ -544,19 +507,15 @@ def __init__( @property def _mean_stddev_shape(self) -> tuple[int, ...]: - return self.num_output_units, self.num_channels + return (self.num_output_units,) @property def _log_partition_shape(self) -> tuple[int, ...]: - return self.num_output_units, self.num_channels + return (self.num_output_units,) @property def config(self) -> Mapping[str, Any]: - return { - "scope": self.scope, - "num_output_units": self.num_output_units, - "num_channels": self.num_channels, - } + return {"scope": self.scope, "num_output_units": self.num_output_units} @property def params(self) -> Mapping[str, Parameter]: @@ -573,7 +532,6 @@ def __init__( self, scope: Scope, num_output_units: int, - num_channels: int, *, degree: int, coeff: Parameter | None = None, @@ -584,7 +542,6 @@ def __init__( Args: scope: The variables scope the layer depends on. num_output_units: The number of units each encoding a polynomial in the layer. - num_channels: The number of channels per variable. degree: The degree of the polynomials. coeff: The coefficient parameter of shape $(K, \mathsf{degree} + 1)$, where $K$ is the number of output units. If it is None, then either the coefficient factory @@ -596,7 +553,7 @@ def __init__( """ if len(scope) != 1: raise ValueError("The Polynomial layer encodes univariate functions") - super().__init__(scope, num_output_units, num_channels) + super().__init__(scope, num_output_units) self.degree = degree if coeff is None: if coeff_factory is None: @@ -618,7 +575,6 @@ def config(self) -> Mapping[str, Any]: return { "scope": self.scope, "num_output_units": self.num_output_units, - "num_channels": self.num_channels, "degree": self.degree, } diff --git a/cirkit/symbolic/operators.py b/cirkit/symbolic/operators.py index d965622b..b9e4e446 100644 --- a/cirkit/symbolic/operators.py +++ b/cirkit/symbolic/operators.py @@ -39,9 +39,8 @@ def integrate_embedding_layer(sl: EmbeddingLayer, *, scope: Scope) -> CircuitBlo f"The scope of the Embedding layer '{sl.scope}'" f" is expected to be a subset of the integration scope '{scope}'" ) - reduce_sum = ReduceSumParameter(sl.weight.shape, axis=2) - reduce_prod = ReduceProductParameter(reduce_sum.shape, axis=1) - value = Parameter.from_sequence(sl.weight.ref(), reduce_sum, reduce_prod) + reduce_sum = ReduceSumParameter(sl.weight.shape, axis=1) + value = Parameter.from_unary(reduce_sum, sl.weight.ref()) sl = ConstantValueLayer(sl.num_output_units, log_space=False, value=value) return CircuitBlock.from_layer(sl) @@ -55,9 +54,8 @@ def integrate_categorical_layer(sl: CategoricalLayer, *, scope: Scope) -> Circui if sl.logits is None: log_partition = Parameter.from_input(ConstantParameter(sl.num_output_units, value=0.0)) else: - reduce_lse = ReduceLSEParameter(sl.logits.shape, axis=2) - reduce_channels = ReduceSumParameter(reduce_lse.shape, axis=1) - log_partition = Parameter.from_sequence(sl.logits.ref(), reduce_lse, reduce_channels) + reduce_lse = ReduceLSEParameter(sl.logits.shape, axis=1) + log_partition = Parameter.from_unary(reduce_lse, sl.logits.ref()) sl = ConstantValueLayer(sl.num_output_units, log_space=True, value=log_partition) return CircuitBlock.from_layer(sl) @@ -71,8 +69,7 @@ def integrate_gaussian_layer(sl: GaussianLayer, *, scope: Scope) -> CircuitBlock if sl.log_partition is None: log_partition = Parameter.from_input(ConstantParameter(sl.num_output_units, value=0.0)) else: - reduce_channels = ReduceSumParameter(sl.log_partition.shape, axis=1) - log_partition = Parameter.from_unary(reduce_channels, sl.log_partition.ref()) + log_partition = sl.log_partition.ref() sl = ConstantValueLayer(sl.num_output_units, log_space=True, value=log_partition) return CircuitBlock.from_layer(sl) @@ -83,11 +80,6 @@ def multiply_embedding_layers(sl1: EmbeddingLayer, sl2: EmbeddingLayer) -> Circu f"Expected Embedding layers to have the same scope," f" but found '{sl1.scope}' and '{sl2.scope}'" ) - if sl1.num_channels != sl2.num_channels: - raise ValueError( - f"Expected Embedding layers to have the number of channels," - f"but found '{sl1.num_channels}' and '{sl2.num_channels}'" - ) if sl1.num_states != sl2.num_states: raise ValueError( f"Expected Embedding layers to have the number of categories," @@ -102,7 +94,6 @@ def multiply_embedding_layers(sl1: EmbeddingLayer, sl2: EmbeddingLayer) -> Circu sl = EmbeddingLayer( sl1.scope, sl1.num_output_units * sl2.num_output_units, - num_channels=sl1.num_channels, num_states=sl1.num_states, weight=weight, ) @@ -115,11 +106,6 @@ def multiply_categorical_layers(sl1: CategoricalLayer, sl2: CategoricalLayer) -> f"Expected Categorical layers to have the same scope," f" but found '{sl1.scope}' and '{sl2.scope}'" ) - if sl1.num_channels != sl2.num_channels: - raise ValueError( - f"Expected Categorical layers to have the number of channels," - f"but found '{sl1.num_channels}' and '{sl2.num_channels}'" - ) if sl1.num_categories != sl2.num_categories: raise ValueError( f"Expected Categorical layers to have the number of categories," @@ -142,7 +128,6 @@ def multiply_categorical_layers(sl1: CategoricalLayer, sl2: CategoricalLayer) -> sl = CategoricalLayer( sl1.scope, sl1.num_output_units * sl2.num_output_units, - num_channels=sl1.num_channels, num_categories=sl1.num_categories, logits=sl_logits, ) @@ -155,11 +140,6 @@ def multiply_gaussian_layers(sl1: GaussianLayer, sl2: GaussianLayer) -> CircuitB f"Expected Gaussian layers to have the same scope," f" but found '{sl1.scope}' and '{sl2.scope}'" ) - if sl1.num_channels != sl2.num_channels: - raise ValueError( - f"Expected Gaussian layers to have the number of channels," - f"but found '{sl1.num_channels}' and '{sl2.num_channels}'" - ) mean = Parameter.from_nary( GaussianProductMean(sl1.mean.shape, sl1.stddev.shape, sl2.mean.shape, sl2.stddev.shape), @@ -185,11 +165,11 @@ def multiply_gaussian_layers(sl1: GaussianLayer, sl2: GaussianLayer) -> CircuitB if sl1.log_partition is not None or sl2.log_partition is not None: if sl1.log_partition is None: - log_partition1 = ConstantParameter(sl1.num_output_units, sl1.num_channels, value=0.0) + log_partition1 = ConstantParameter(sl1.num_output_units, value=0.0) else: log_partition1 = sl1.log_partition.ref() if sl2.log_partition is None: - log_partition2 = ConstantParameter(sl2.num_output_units, sl2.num_channels, value=0.0) + log_partition2 = ConstantParameter(sl2.num_output_units, value=0.0) else: log_partition2 = sl2.log_partition.ref() log_partition = Parameter.from_binary( @@ -205,7 +185,6 @@ def multiply_gaussian_layers(sl1: GaussianLayer, sl2: GaussianLayer) -> CircuitB sl = GaussianLayer( sl1.scope, sl1.num_output_units * sl2.num_output_units, - num_channels=sl1.num_channels, mean=mean, stddev=stddev, log_partition=log_partition, @@ -219,11 +198,6 @@ def multiply_polynomial_layers(sl1: PolynomialLayer, sl2: PolynomialLayer) -> Ci f"Expected Polynomial layers to have the same scope," f" but found '{sl1.scope}' and '{sl2.scope}'" ) - if sl1.num_channels != sl2.num_channels: - raise ValueError( - f"Expected Polynomial layers to have the number of channels," - f"but found '{sl1.num_channels}' and '{sl2.num_channels}'" - ) shape1, shape2 = sl1.coeff.shape, sl2.coeff.shape coeff = Parameter.from_binary( @@ -235,7 +209,6 @@ def multiply_polynomial_layers(sl1: PolynomialLayer, sl2: PolynomialLayer) -> Ci sl = PolynomialLayer( sl1.scope, sl1.num_output_units * sl2.num_output_units, - num_channels=sl1.num_channels, degree=sl1.degree + sl2.degree, coeff=coeff, ) @@ -264,26 +237,22 @@ def multiply_sum_layers(sl1: SumLayer, sl2: SumLayer) -> CircuitBlock: def differentiate_polynomial_layer( - sl: PolynomialLayer, *, var_idx: int, ch_idx: int, order: int = 1 + sl: PolynomialLayer, *, var_idx: int, order: int = 1 ) -> CircuitBlock: # PolynomialLayer is constructed univariate, but we still take the 2 idx for unified interface - assert (var_idx, ch_idx) == (0, 0), "This should not happen" + assert var_idx == 0, "This should not happen" if order <= 0: raise ValueError("The order of differentiation must be positive.") coeff = Parameter.from_unary( PolynomialDifferential(sl.coeff.shape, order=order), sl.coeff.ref() ) - sl = PolynomialLayer( - sl.scope, sl.num_output_units, sl.num_channels, degree=coeff.shape[-1] - 1, coeff=coeff - ) + sl = PolynomialLayer(sl.scope, sl.num_output_units, degree=coeff.shape[-1] - 1, coeff=coeff) return CircuitBlock.from_layer(sl) def conjugate_embedding_layer(sl: EmbeddingLayer) -> CircuitBlock: weight = Parameter.from_unary(ConjugateParameter(sl.weight.shape), sl.weight.ref()) - sl = EmbeddingLayer( - sl.scope, sl.num_output_units, sl.num_channels, num_states=sl.num_states, weight=weight - ) + sl = EmbeddingLayer(sl.scope, sl.num_output_units, num_states=sl.num_states, weight=weight) return CircuitBlock.from_layer(sl) @@ -293,7 +262,6 @@ def conjugate_categorical_layer(sl: CategoricalLayer) -> CircuitBlock: sl = CategoricalLayer( sl.scope, sl.num_output_units, - sl.num_channels, num_categories=sl.num_categories, logits=logits, probs=probs, @@ -304,15 +272,13 @@ def conjugate_categorical_layer(sl: CategoricalLayer) -> CircuitBlock: def conjugate_gaussian_layer(sl: GaussianLayer) -> CircuitBlock: mean = sl.mean.ref() if sl.mean is not None else None stddev = sl.stddev.ref() if sl.stddev is not None else None - sl = GaussianLayer(sl.scope, sl.num_output_units, sl.num_channels, mean=mean, stddev=stddev) + sl = GaussianLayer(sl.scope, sl.num_output_units, mean=mean, stddev=stddev) return CircuitBlock.from_layer(sl) def conjugate_polynomial_layer(sl: PolynomialLayer) -> CircuitBlock: coeff = Parameter.from_unary(ConjugateParameter(sl.coeff.shape), sl.coeff.ref()) - sl = PolynomialLayer( - sl.scope, sl.num_output_units, sl.num_channels, degree=sl.degree, coeff=coeff - ) + sl = PolynomialLayer(sl.scope, sl.num_output_units, degree=sl.degree, coeff=coeff) return CircuitBlock.from_layer(sl) diff --git a/cirkit/symbolic/parameters.py b/cirkit/symbolic/parameters.py index ca701fdc..c927c104 100644 --- a/cirkit/symbolic/parameters.py +++ b/cirkit/symbolic/parameters.py @@ -695,16 +695,11 @@ def __init__( """ assert in_mean1_shape == in_stddev1_shape assert in_mean2_shape == in_stddev2_shape - assert in_mean1_shape[1] == in_mean2_shape[1] - assert in_stddev1_shape[1] == in_stddev2_shape[1] super().__init__(in_mean1_shape, in_stddev1_shape, in_mean2_shape, in_stddev2_shape) @property def shape(self) -> tuple[int, ...]: - return ( - self.in_shapes[0][0] * self.in_shapes[2][0], - self.in_shapes[0][1], - ) + return (self.in_shapes[0][0] * self.in_shapes[2][0],) @property def config(self) -> dict[str, Any]: @@ -731,15 +726,11 @@ def __init__(self, in_stddev1_shape: tuple[int, ...], in_stddev2_shape: tuple[in in_stddev2_shape: The shape of the standard deviations of the second univariate Gaussians. """ - assert in_stddev1_shape[1] == in_stddev2_shape[1] super().__init__(in_stddev1_shape, in_stddev2_shape) @property def shape(self) -> tuple[int, ...]: - return ( - self.in_shapes[0][0] * self.in_shapes[1][0], - self.in_shapes[0][1], - ) + return (self.in_shapes[0][0] * self.in_shapes[1][0],) @property def config(self) -> dict[str, Any]: @@ -771,16 +762,11 @@ def __init__( """ assert in_mean1_shape == in_stddev1_shape assert in_mean2_shape == in_stddev2_shape - assert in_mean1_shape[1] == in_mean2_shape[1] - assert in_stddev1_shape[1] == in_stddev2_shape[1] super().__init__(in_mean1_shape, in_stddev1_shape, in_mean2_shape, in_stddev2_shape) @property def shape(self) -> tuple[int, ...]: - return ( - self.in_shapes[0][0] * self.in_shapes[2][0], - self.in_shapes[0][1], - ) + return (self.in_shapes[0][0] * self.in_shapes[2][0],) @property def config(self) -> dict[str, Any]: diff --git a/cirkit/templates/data_modalities.py b/cirkit/templates/data_modalities.py index 20f0d256..85934b7d 100644 --- a/cirkit/templates/data_modalities.py +++ b/cirkit/templates/data_modalities.py @@ -80,19 +80,18 @@ def image_data( raise ValueError(f"Unknown input layer called {input_layer}") # Construct the image-tailored region graph - image_hw = (image_shape[1], image_shape[2]) match region_graph: case "quad-tree-2": - rg = QuadTree(image_hw, num_patch_splits=2) + rg = QuadTree(image_shape, num_patch_splits=2) case "quad-tree-4": - rg = QuadTree(image_hw, num_patch_splits=4) + rg = QuadTree(image_shape, num_patch_splits=4) case "quad-graph": - rg = QuadGraph(image_hw) + rg = QuadGraph(image_shape) case "random-binary-tree": - rg = RandomBinaryTree(np.prod(image_hw)) + rg = RandomBinaryTree(np.prod(image_shape)) case "poon-domingos": - delta = max(np.ceil(image_hw[0] / 8), np.ceil(image_hw[1] / 8)) - rg = PoonDomingos(image_hw, delta=delta) + delta = max(np.ceil(image_shape[1] / 8), np.ceil(image_shape[2] / 8)) + rg = PoonDomingos(image_shape, delta=delta) case _: raise ValueError(f"Unknown region graph called {region_graph}") @@ -135,7 +134,6 @@ def image_data( sum_product=sum_product_layer, sum_weight_factory=sum_weight_factory, nary_sum_weight_factory=nary_sum_weight_factory, - num_channels=image_shape[0], num_input_units=num_input_units, num_sum_units=num_sum_units, num_classes=num_classes, diff --git a/cirkit/templates/logic/graph.py b/cirkit/templates/logic/graph.py index f4740236..de2ac35e 100644 --- a/cirkit/templates/logic/graph.py +++ b/cirkit/templates/logic/graph.py @@ -236,7 +236,6 @@ def build_circuit( literal_input_factory: InputLayerFactory = None, negated_literal_input_factory: InputLayerFactory = None, weight_factory: ParameterFactory | None = None, - num_channels: int = 1, enforce_smoothness: bool = True, ) -> Circuit: """Construct a symbolic circuit from a logic circuit graph. @@ -253,7 +252,6 @@ def build_circuit( a symbolic parameter. If None is used, the default weight factory uses non-trainable unitary parameters, which instantiate a regular boolean logic graph. - num_channels: The number of channels for each variable. enforce_smoothness: Enforces smoothness of the circuit to support efficient marginalization. @@ -293,12 +291,10 @@ def weight_factory(n: tuple[int]) -> Parameter: for i in self.inputs: match i: case LiteralNode(): - node_to_layer[i] = literal_input_factory( - Scope([i.literal]), num_units=1, num_channels=num_channels - ) + node_to_layer[i] = literal_input_factory(Scope([i.literal]), num_units=1) case NegatedLiteralNode(): node_to_layer[i] = negated_literal_input_factory( - Scope([i.literal]), num_units=1, num_channels=num_channels + Scope([i.literal]), num_units=1 ) for node in self.topological_ordering(): @@ -318,4 +314,4 @@ def weight_factory(n: tuple[int]) -> Parameter: node_to_layer[node] = sum_node layers = list(set(itertools.chain(*in_layers.values())).union(in_layers.keys())) - return Circuit(num_channels, layers, in_layers, [node_to_layer[self.output]]) + return Circuit(layers, in_layers, [node_to_layer[self.output]]) diff --git a/cirkit/templates/logic/sdd.py b/cirkit/templates/logic/sdd.py index 79da00b1..6f5f3472 100644 --- a/cirkit/templates/logic/sdd.py +++ b/cirkit/templates/logic/sdd.py @@ -1,6 +1,5 @@ -import itertools import re -from collections import defaultdict, deque +from collections import defaultdict from itertools import chain from cirkit.templates.logic.graph import ( @@ -15,18 +14,6 @@ ) -def sliding_window(iterable, n): - """Collect data into overlapping fixed-length chunks or blocks. - taken from https://docs.python.org/3/library/itertools.html - """ - # sliding_window('ABCDEFG', 4) → ABCD BCDE CDEF DEFG - iterator = iter(iterable) - window = deque(itertools.islice(iterator, n - 1), maxlen=n) - for x in iterator: - window.append(x) - yield tuple(window) - - class SDD(LogicalCircuit): @staticmethod def load(filename: str) -> "SDD": diff --git a/cirkit/templates/logic/utils.py b/cirkit/templates/logic/utils.py index 45d5ef48..1b41a4e2 100644 --- a/cirkit/templates/logic/utils.py +++ b/cirkit/templates/logic/utils.py @@ -20,15 +20,14 @@ def default_literal_input_factory(negated: bool = False) -> InputLayerFactory: InputLayerFactory: The input layer factory. """ - def input_factory(scope: Scope, num_units: int, num_channels: int) -> InputLayer: + def input_factory(scope: Scope, num_units: int) -> InputLayer: param = np.array([1.0, 0.0]) if negated else np.array([0.0, 1.0]) initializer = ConstantTensorInitializer(param) return CategoricalLayer( scope, num_categories=2, num_output_units=num_units, - num_channels=num_channels, - probs=Parameter.from_input(TensorParameter(1, 1, 2, initializer=initializer)), + probs=Parameter.from_input(TensorParameter(1, 2, initializer=initializer)), ) return input_factory diff --git a/cirkit/templates/region_graph/algorithms/poon_domingos.py b/cirkit/templates/region_graph/algorithms/poon_domingos.py index 5f41540b..a5a00e5f 100644 --- a/cirkit/templates/region_graph/algorithms/poon_domingos.py +++ b/cirkit/templates/region_graph/algorithms/poon_domingos.py @@ -12,17 +12,15 @@ from cirkit.utils.scope import Scope -# TODO: too-complex,too-many-locals. how to solve? # DISABLE: We use function name with upper case to mimic a class constructor. # pylint: disable-next=invalid-name,too-complex,too-many-locals def PoonDomingos( - shape: Sequence[int], + shape: tuple[int, int, int], *, delta: float | list[float] | list[list[float]], - axes: Sequence[int] | None = None, max_depth: int | None = None, ) -> RegionGraph: - """Constructs a region graph with the Poon-Domingos structure. + r"""Constructs a region graph with the Poon-Domingos structure. See: Sum-Product Networks: A New Deep Architecture. @@ -30,20 +28,18 @@ def PoonDomingos( UAI 2011. Args: - shape (Sequence[int]): The shape of the hypercube for the variables. - delta (Union[float, List[float], List[List[float]]]): The deltas to cut the hypercube, can \ - be: a single cut delta for all axes, a list for all axes, a list of list for each \ + shape: The image shape $(C, H, W)$, where $H$ is the height, $W$ is the width, + and $C$ is the number of channels. + delta: The deltas to cut the hypercube, can + be: a single cut delta for all axes, a list for all axes, a list of list for each axis. If the last case, all inner lists must have the same length as axes. - axes (Optional[Sequence[int]], optional): The axes to cut. Default means all axes. \ - Defaults to None. - max_depth (Optional[int], optional): The max depth for cutting, omit for unconstrained. \ + max_depth: The max depth for cutting, omit for unconstrained. Defaults to None. Returns: RegionGraph: The Poon-Domingos region grpah. """ - if axes is None: - axes = tuple(range(len(shape))) + axes = (1, 2) # The axes to cut, i.e., the height and width axes. cut_points = _parse_poon_domingos_delta(delta, shape, axes) if max_depth is None: @@ -65,7 +61,7 @@ def PoonDomingos( queue: deque[HyperCube] = deque() depth_dict: dict[HyperCube, int] = {} # Also serve as a "visited" set. - cur_hypercube = ((0,) * len(shape), tuple(shape)) + cur_hypercube = ((0,) * len(shape), shape) root_scope = hypercube_to_scope[cur_hypercube] root = RegionNode(root_scope) nodes.append(root) diff --git a/cirkit/templates/region_graph/algorithms/quad.py b/cirkit/templates/region_graph/algorithms/quad.py index 8048e241..efa10f91 100644 --- a/cirkit/templates/region_graph/algorithms/quad.py +++ b/cirkit/templates/region_graph/algorithms/quad.py @@ -12,11 +12,12 @@ # pylint: disable-next=invalid-name -def QuadTree(shape: tuple[int, int], *, num_patch_splits: int = 2) -> RegionGraph: - """Constructs a Quad Tree region graph. +def QuadTree(shape: tuple[int, int, int], *, num_patch_splits: int = 2) -> RegionGraph: + r"""Constructs a Quad Tree region graph. Args: - shape: The image shape (H, W), where H is the height and W is the width. + shape: The image shape $(C, H, W)$, where $H$ is the height, $W$ is the width, + and $C$ is the number of channels. num_patch_splits: The number of splits per patitioning, it can be either 2 or 4. Returns: @@ -30,11 +31,12 @@ def QuadTree(shape: tuple[int, int], *, num_patch_splits: int = 2) -> RegionGrap # pylint: disable-next=invalid-name -def QuadGraph(shape: tuple[int, int]) -> RegionGraph: - """Constructs a Quad Graph region graph. +def QuadGraph(shape: tuple[int, int, int]) -> RegionGraph: + r"""Constructs a Quad Graph region graph. Args: - shape: The image shape (H, W), where H is the height and W is the width. + shape: The image shape $(C, H, W)$, where $H$ is the height, $W$ is the width, + and $C$ is the number of channels. Returns: RegionGraph: A Quad Graph region graph. @@ -47,35 +49,33 @@ def QuadGraph(shape: tuple[int, int]) -> RegionGraph: # pylint: disable-next=invalid-name def _QuadBuilder( - shape: tuple[int, int], *, is_tree: bool = False, num_patch_splits: int = 2 + shape: tuple[int, int, int], *, is_tree: bool = False, num_patch_splits: int = 2 ) -> RegionGraph: - """Construct a RG with a quad tree. + r"""Construct a RG with a quad tree. Args: - shape (Tuple[int, int]): The shape of the image, in (H, W). - is_tree (bool, optional): Whether the RG needs to be \ + shape: The image shape $(C, H, W)$, where $H$ is the height, $W$ is the width, + and $C$ is the number of channels. + is_tree: Whether the RG needs to be \ structured-decomposable. Defaults to False. - num_patch_splits (int): The number of patches to split. It can be either 2 or 4. + num_patch_splits: The number of patches to split. It can be either 2 or 4. This is used only when is_tree is True. Returns: - RegionGraph: The QT RG. + RegionGraph: A region graph. Raises: ValueError: The image shape is not valid. ValueError: The number of patches to split is not valid. """ - if len(shape) != 2: - raise ValueError("Quad Tree and Quad Graph region graphs only works for 2D images") - height, width = shape - if height <= 0 or width <= 0: - raise ValueError("Height and width must be positive integers") + if len(shape) != 3: + raise ValueError("Quad Tree and Quad Graph region graphs only works for images") + num_channels, height, width = shape + if num_channels <= 0 or height <= 0 or width <= 0: + raise ValueError("The number of channels, the height and the width must be positive") if is_tree and num_patch_splits not in [2, 4]: raise ValueError("The number of patches to split must be either 2 or 4") - # An object mapping rectangles of coordinates into variable scopes - hypercube_to_scope = HypercubeToScope(shape) - # Padding using Scope({num_var}) which is one larger than range(num_var). # DISABLE: This is considered a constant here, although RegionNode is mutable. PADDING = RegionNode({height * width}) # pylint: disable=invalid-name @@ -88,9 +88,12 @@ def _QuadBuilder( # A map to each region/partition node to its children in_nodes: dict[RegionGraphNode, list[RegionGraphNode]] = defaultdict(list) - # Add univariate input region nodes + # Add input region nodes + # An object mapping rectangles of coordinates into variable scopes + hypercube_to_scope = HypercubeToScope(shape) for i, j in itertools.product(range(height), range(width)): - rgn = RegionNode(hypercube_to_scope[((i, j), (i + 1, j + 1))]) + scope = hypercube_to_scope[((0, i, j), (num_channels, i + 1, j + 1))] + rgn = RegionNode(scope) grid[i][j] = rgn nodes.append(rgn) diff --git a/cirkit/templates/region_graph/algorithms/utils.py b/cirkit/templates/region_graph/algorithms/utils.py index 64500b10..1c21cf4f 100644 --- a/cirkit/templates/region_graph/algorithms/utils.py +++ b/cirkit/templates/region_graph/algorithms/utils.py @@ -1,6 +1,4 @@ -import math from collections import defaultdict -from collections.abc import Sequence import numpy as np @@ -25,41 +23,39 @@ class HypercubeToScope(dict[HyperCube, Scope]): - If it's not in the dict yet, the scope is calculated and cached to the dict. """ - def __init__(self, shape: Sequence[int]) -> None: - """Init class. - + def __init__(self, shape: tuple[int, int, int]) -> None: + r"""Initialize a hypercube to scope object. Note that this does not accept initial elements and is initialized empty. Args: - shape (Sequence[int]): The shape of the whole hypercube. + shape: The image shape $(C, H, W)$, where $H$ is the height, $W$ is the width, + and $C$ is the number of channels. """ super().__init__() self.ndims = len(shape) - self.shape = tuple(shape) - # We assume it's feasible to save the whole hypercube, since it should be the whole region. + self.shape = shape # ANNOTATE: Numpy has typing issues. - self.hypercube = np.arange(math.prod(shape), dtype=np.int64).reshape(shape) + self.hypercube = np.arange(np.prod(shape), dtype=np.int64).reshape(shape) def __missing__(self, key: HyperCube) -> Scope: """Construct the item when not exist in the dict. Args: - key (HyperCube): The key that is missing from the dict, i.e., a hypercube that is \ + key: The key that is missing from the dict, i.e., a hypercube that is visited for the first time. Returns: Scope: The value for the key, i.e., the corresponding scope. + + Raises: + ValueError: If the hyper-cube key has incorrect shape, or if it's empty. """ point1, point2 = key # HyperCube is from point1 to point2. - assert ( - len(point1) == len(point2) == self.ndims - ), "The dimension of the HyperCube is not correct." - assert all( - 0 <= x1 < x2 <= shape for x1, x2, shape in zip(point1, point2, self.shape) - ), "The HyperCube is empty." - - # IGNORE: Numpy has typing issues. + if not (len(point1) == len(point2) == self.ndims): + raise ValueError("The dimension of the HyperCube is not correct") + if not all(0 <= x1 < x2 <= shape for x1, x2, shape in zip(point1, point2, self.shape)): + raise ValueError("The HyperCube is empty") return Scope( self.hypercube[ # type: ignore[misc] tuple(slice(x1, x2) for x1, x2 in zip(point1, point2)) diff --git a/cirkit/templates/region_graph/graph.py b/cirkit/templates/region_graph/graph.py index 79fd66c8..0111c562 100644 --- a/cirkit/templates/region_graph/graph.py +++ b/cirkit/templates/region_graph/graph.py @@ -347,7 +347,6 @@ def build_circuit( nary_sum_weight_factory: ParameterFactory | None = None, sum_factory: SumLayerFactory | None = None, prod_factory: ProductLayerFactory | None = None, - num_channels: int = 1, num_input_units: int = 1, num_sum_units: int = 1, num_classes: int = 1, @@ -373,7 +372,6 @@ def build_circuit( the given sum_weight_factory. sum_factory: A factory that builds a sum layer. It can be None. prod_factory: A factory that builds a product layer. It can be None. - num_channels: The number of channels for each variable. num_input_units: The number of input units. num_sum_units: The number of sum units per sum layer. num_classes: The number of output classes. @@ -520,14 +518,13 @@ def build_tucker_(rgn: RegionNode, rgn_partitioning: Sequence[RegionNode]) -> Su # Input region node if factorize_multivariate and len(node.scope) > 1: factorized_input_sls = [ - input_factory(Scope([sc]), num_input_units, num_channels) - for sc in node.scope + input_factory(Scope([sc]), num_input_units) for sc in node.scope ] input_sl = HadamardLayer(num_input_units, arity=len(factorized_input_sls)) layers.extend(factorized_input_sls) in_layers[input_sl] = factorized_input_sls else: - input_sl = input_factory(node.scope, num_input_units, num_channels) + input_sl = input_factory(node.scope, num_input_units) num_units = num_sum_units if self.region_outputs(node) else num_classes if sum_factory is None: layers.append(input_sl) @@ -575,4 +572,4 @@ def build_tucker_(rgn: RegionNode, rgn_partitioning: Sequence[RegionNode]) -> Su node_to_layer[node] = mix_sl outputs = [node_to_layer[rgn] for rgn in self.outputs] - return Circuit(num_channels, layers, in_layers, outputs) + return Circuit(layers, in_layers, outputs) diff --git a/cirkit/templates/tensor_factorizations.py b/cirkit/templates/tensor_factorizations.py index 29fcd5b8..0596766b 100644 --- a/cirkit/templates/tensor_factorizations.py +++ b/cirkit/templates/tensor_factorizations.py @@ -126,14 +126,11 @@ def cp( embedding_layer_factories: list[InputLayerFactory] = [ _input_layer_factory_builder(input_layer, dim, factor_param_kwargs) for dim in shape ] - embedding_layers = [ - f(Scope([i]), rank, num_channels=1) for i, f in enumerate(embedding_layer_factories) - ] + embedding_layers = [f(Scope([i]), rank) for i, f in enumerate(embedding_layer_factories)] hadamard_layer = HadamardLayer(rank, arity=len(shape)) sum_layer = SumLayer(rank, 1, arity=1, weight=weight, weight_factory=weight_factory) return Circuit( - 1, layers=embedding_layers + [hadamard_layer, sum_layer], in_layers={sum_layer: [hadamard_layer], hadamard_layer: embedding_layers}, outputs=[sum_layer], @@ -218,14 +215,11 @@ def tucker( embedding_layer_factories: list[InputLayerFactory] = [ _input_layer_factory_builder(input_layer, dim, factor_param_kwargs) for dim in shape ] - embedding_layers = [ - f(Scope([i]), rank, num_channels=1) for i, f in enumerate(embedding_layer_factories) - ] + embedding_layers = [f(Scope([i]), rank) for i, f in enumerate(embedding_layer_factories)] kronecker_layer = KroneckerLayer(rank, arity=len(shape)) sum_layer = SumLayer(cast(int, rank ** len(shape)), 1, arity=1, weight_factory=weight_factory) return Circuit( - 1, layers=embedding_layers + [kronecker_layer, sum_layer], in_layers={sum_layer: [kronecker_layer], kronecker_layer: embedding_layers}, outputs=[sum_layer], @@ -291,14 +285,14 @@ def tensor_train( # Construct the first, last, and inner embedding layers first_embedding = EmbeddingLayer( - Scope([0]), rank, 1, num_states=shape[0], weight_factory=embedding_factory + Scope([0]), rank, num_states=shape[0], weight_factory=embedding_factory ) last_embedding = EmbeddingLayer( - Scope([len(shape) - 1]), rank, 1, num_states=shape[-1], weight_factory=embedding_factory + Scope([len(shape) - 1]), rank, num_states=shape[-1], weight_factory=embedding_factory ) inner_embeddings = [ [ - EmbeddingLayer(Scope([i]), rank, 1, num_states=dim, weight_factory=embedding_factory) + EmbeddingLayer(Scope([i]), rank, num_states=dim, weight_factory=embedding_factory) for _ in range(rank) ] for i, dim in enumerate(shape[1:-1], start=1) @@ -351,7 +345,6 @@ def tensor_train( # Instantiate and return the circuit return Circuit( - 1, layers=layers, in_layers=in_layers, outputs=[cur_sl], diff --git a/cirkit/templates/utils.py b/cirkit/templates/utils.py index 1f636e78..c16d706c 100644 --- a/cirkit/templates/utils.py +++ b/cirkit/templates/utils.py @@ -51,13 +51,12 @@ class Parameterization: class InputLayerFactory(Protocol): # pylint: disable=too-few-public-methods """The protocol of a factory that constructs input layers.""" - def __call__(self, scope: Scope, num_units: int, num_channels: int) -> InputLayer: + def __call__(self, scope: Scope, num_units: int) -> InputLayer: """Constructs an input layer. Args: scope: The scope of the layer. num_units: The number of input units composing the layer. - num_channels: The number of channel variables. Returns: InputLayer: An input layer. diff --git a/notebooks/compilation-options.ipynb b/notebooks/compilation-options.ipynb index 4058c8dc..92d3d342 100644 --- a/notebooks/compilation-options.ipynb +++ b/notebooks/compilation-options.ipynb @@ -203,8 +203,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 4.74 s, sys: 1.13 s, total: 5.87 s\n", - "Wall time: 5.76 s\n" + "CPU times: user 4.46 s, sys: 998 ms, total: 5.46 s\n", + "Wall time: 5.38 s\n" ] } ], @@ -249,7 +249,7 @@ } ], "source": [ - "batch = torch.randint(256, size=(1, 1, 784), device=device)\n", + "batch = torch.randint(256, size=(1, 784), device=device)\n", "circuit(batch).item()" ] }, @@ -273,13 +273,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "1.42 s ± 16.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" + "1.37 s ± 24.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ], "source": [ "%%timeit\n", - "batch = torch.randint(256, size=(128, 1, 784), device=device)\n", + "batch = torch.randint(256, size=(128, 784), device=device)\n", "circuit(batch)\n", "if 'cuda' in str(device):\n", " torch.cuda.synchronize(device)" @@ -338,8 +338,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 4.98 s, sys: 809 ms, total: 5.79 s\n", - "Wall time: 5.69 s\n" + "CPU times: user 4.6 s, sys: 1.01 s, total: 5.62 s\n", + "Wall time: 5.54 s\n" ] } ], @@ -420,7 +420,7 @@ "id": "f074e168-dee4-4234-8eae-afd28fae317f", "metadata": {}, "source": [ - "As we see in the next code snippet, enabling folding provided an (approximately) **28.9x speed-up** for feed-forward circuit evaluations." + "As we see in the next code snippet, enabling folding provided an (approximately) **18.1x speed-up** for feed-forward circuit evaluations." ] }, { @@ -433,13 +433,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "49.1 ms ± 33 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" + "75.8 ms ± 7.76 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" ] } ], "source": [ "%%timeit\n", - "batch = torch.randint(256, size=(128, 1, 784), device=device)\n", + "batch = torch.randint(256, size=(128, 784), device=device)\n", "folded_circuit(batch)\n", "if 'cuda' in str(device):\n", " torch.cuda.synchronize(device)" @@ -527,8 +527,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 5.06 s, sys: 1.07 s, total: 6.12 s\n", - "Wall time: 6.02 s\n" + "CPU times: user 4.78 s, sys: 1.01 s, total: 5.79 s\n", + "Wall time: 5.71 s\n" ] } ], @@ -591,13 +591,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "25.4 ms ± 8.21 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" + "38.6 ms ± 5.62 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" ] } ], "source": [ "%%timeit\n", - "batch = torch.randint(256, size=(128, 1, 784), device=device)\n", + "batch = torch.randint(256, size=(128, 784), device=device)\n", "optimized_circuit(batch)\n", "if 'cuda' in str(device):\n", " torch.cuda.synchronize(device)" @@ -608,8 +608,16 @@ "id": "11d95c02-2c66-4414-b676-0dec303f2aa9", "metadata": {}, "source": [ - "Note that, we achieved an (approximately) **1.9x speed-up**, when compared to the folded circuit compiled above, and an (approximately) **55.9x speed-up**, when compared to the circuit compiled with no folding and no optimizations." + "Note that, we achieved an (approximately) **2.0x speed-up**, when compared to the folded circuit compiled above, and an (approximately) **35.5x speed-up**, when compared to the circuit compiled with no folding and no optimizations." ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3203f891-ad64-4727-9ede-529d1215dc2a", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/notebooks/compression-cp-factorization.ipynb b/notebooks/compression-cp-factorization.ipynb index c63942ec..916a33fe 100644 --- a/notebooks/compression-cp-factorization.ipynb +++ b/notebooks/compression-cp-factorization.ipynb @@ -266,15 +266,14 @@ "\n", "for epoch_idx in range(num_epochs):\n", " for i, (batch,) in enumerate(train_dataloader):\n", - " # The circuit expects an input of shape (batch_dim, num_channels, num_variables),\n", - " # so we unsqueeze a dimension for the channel.\n", - " batch = batch.to(device).unsqueeze(dim=1)\n", + " # The circuit expects an input of shape (batch_dim, num_variables),\n", + " batch = batch.to(device)\n", "\n", " # Compute the value of the tensor at the indices in the batch\n", " values = circuit(batch) # shape (batch_dim, 1, 1)\n", " \n", " # We take the MSE as loss\n", - " target_values = original_image[batch[:, 0, 0], batch[:, 0, 1], batch[:, 0, 2]]\n", + " target_values = original_image[batch[:, 0], batch[:, 1], batch[:, 2]]\n", " loss = torch.mean(torch.square(target_values - values[:, 0, 0]))\n", " loss.backward()\n", "\n", @@ -299,13 +298,13 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "id": "adad2989-4221-45f5-af3b-81740fdbd14a", "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -321,7 +320,7 @@ "circuit.eval()\n", "\n", "# Reconstruct the image\n", - "recon_image = circuit(torch.from_numpy(image_indices).to(device).unsqueeze(dim=1))\n", + "recon_image = circuit(torch.from_numpy(image_indices).to(device))\n", "recon_image = recon_image.squeeze(dim=2).squeeze(dim=1).view(original_image.shape)\n", "recon_image = (recon_image - recon_image.min()) / (recon_image.max() - recon_image.min())\n", "\n", diff --git a/notebooks/generative-vs-discriminative-circuit.ipynb b/notebooks/generative-vs-discriminative-circuit.ipynb index 94e4bf65..d4943eb1 100644 --- a/notebooks/generative-vs-discriminative-circuit.ipynb +++ b/notebooks/generative-vs-discriminative-circuit.ipynb @@ -387,7 +387,7 @@ "from cirkit.pipeline import compile\n", "\n", "# Set the torch device to use\n", - "device = torch.device('cuda')\n", + "device = torch.device('cuda:1')\n", "\n", "max_num_epochs = 10\n", "eval_every = 200\n", @@ -457,11 +457,10 @@ " assert len(mm) in (xx.shape[0], 1)\n", " log_probs = self.marginal_query(xx, integrate_vars=mm) \n", "\n", - " batch_size, num_channels, num_classes = log_probs.shape\n", - " assert num_channels == 1\n", + " batch_size, _, num_classes = log_probs.shape\n", " assert num_classes > 1\n", "\n", - " # Remove channel dim (which is one)\n", + " # Remove number of output vectors dim (which is one)\n", " log_probs = log_probs.squeeze(dim=1)\n", "\n", " gen_loss = self.generative_loss(log_probs, yy, marginalize=False)\n", @@ -521,8 +520,7 @@ " # Set some seeds\n", " np.random.seed(42)\n", " torch.manual_seed(42)\n", - "\n", - " # Compile the circuit\n", + " \n", " cc = compile(circuit)\n", " # Move the circuit to chosen device\n", " cc = cc.to(device)\n", @@ -552,18 +550,17 @@ " epoch_idx = 0\n", " while epoch_idx < max_num_epochs and patience > 0:\n", " for i, ((inputs), labels) in enumerate(train_dataloader):\n", - " # The circuit expects an input of shape (batch_dim, num_channels, num_variables),\n", - " # so we unsqueeze a dimension for the channel.\n", + " # The circuit expects an input of shape (batch_dim, num_variables)\n", " BS = labels.shape[0]\n", " \n", - " images = inputs['images'].view(BS, 1, -1).to(device)\n", + " images = inputs['images'].view(BS, -1).to(device)\n", " labels = labels.view(BS).to(device)\n", " masks = inputs.get('masks', None)\n", " if masks is not None:\n", " masks = masks.to(device)\n", " \n", " result = model(images=images, labels=labels, masks=masks)\n", - " \n", + "\n", " loss = result['loss']\n", " loss.backward()\n", " \n", @@ -633,7 +630,7 @@ "\n", " BS = labels.shape[0]\n", " \n", - " images = inputs['images'].view(BS, 1, -1).to(device)\n", + " images = inputs['images'].view(BS, -1).to(device)\n", " labels = labels.view(BS).to(device)\n", " masks = inputs.get('masks', None)\n", " if masks is not None:\n", @@ -700,7 +697,7 @@ "\n", "Training QuadGraph $\\lambda=$1.00...\n", "\n", - "\tEpoch 9 Step 1800: Train Loss: 0.003 Train Acc: 99.98% | Valid Loss: 0.229 Valid Acc: 93.50% | patience: 2\r" + "\tEpoch 9 Step 1800: Train Loss: 0.003 Train Acc: 99.98% | Valid Loss: 0.229 Valid Acc: 93.50% | patience: 2" ] }, { @@ -739,6 +736,13 @@ "id": "9287941b", "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The history saving thread hit an unexpected error (OperationalError('attempt to write a readonly database')).History will not be written to the database.\n" + ] + }, { "data": { "text/html": [ @@ -947,7 +951,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "id": "cb642be0-4a7e-4836-b8e5-e5d76f49255b", "metadata": {}, "outputs": [], @@ -968,7 +972,7 @@ " m_data_test = datasets.MNIST('datasets', train=False, download=True, transform=mask_transform(p))\n", "\n", " # Instantiate the training and testing data loaders\n", - " mask_test_dataloader = DataLoader(m_data_test, shuffle=False, batch_size=2048, num_workers=4)\n", + " mask_test_dataloader = DataLoader(m_data_test, shuffle=False, batch_size=2048)\n", "\n", " for k, model in models.items():\n", " stats = eval_model(model, mask_test_dataloader, mode='test')\n", @@ -979,13 +983,13 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "id": "92fe5501-cb97-4c83-ad3f-a162ec844d99", "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -1058,13 +1062,13 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "id": "22c0e65d-7381-44ee-832d-b9288c183c9c", "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -1095,7 +1099,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "id": "54b16691-ff71-460e-a75f-d91e10927978", "metadata": {}, "outputs": [], @@ -1110,7 +1114,7 @@ " ls = str(loader)\n", " for inputs, labels in loader:\n", " BS = labels.shape[0]\n", - " images = inputs['images'].view(BS, 1, -1).to(device)\n", + " images = inputs['images'].view(BS, -1).to(device)\n", " labels = labels.view(BS).to(device)\n", " log_probs = model(images=images, labels=labels)['image_log_probs']\n", " log_probs = log_probs.detach().cpu().numpy()\n", @@ -1137,7 +1141,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 13, "id": "44c04699-deae-4705-a3e5-e486a99c8f7e", "metadata": {}, "outputs": [ diff --git a/notebooks/learning-a-circuit-with-pic.ipynb b/notebooks/learning-a-circuit-with-pic.ipynb index 0d8bd61c..9d67b878 100644 --- a/notebooks/learning-a-circuit-with-pic.ipynb +++ b/notebooks/learning-a-circuit-with-pic.ipynb @@ -79,19 +79,19 @@ "output_type": "stream", "text": [ "TorchCategoricalLayer(\n", - " folds: 784 channels: 1 variables: 1 output-units: 64\n", + " folds: 784 variables: 1 output-units: 64\n", " input-shape: (784, 1, -1, 1)\n", " output-shape: (784, -1, 64)\n", " (probs): TorchParameter(\n", - " shape: (784, 64, 1, 256)\n", - " (0): TorchTensorParameter(output-shape: (784, 64, 1, 256))\n", + " shape: (784, 64, 256)\n", + " (0): TorchTensorParameter(output-shape: (784, 64, 256))\n", " (1): TorchSoftmaxParameter(\n", - " input-shapes: [(784, 64, 1, 256)]\n", - " output-shape: (784, 64, 1, 256)\n", + " input-shapes: [(784, 64, 256)]\n", + " output-shape: (784, 64, 256)\n", " )\n", " )\n", ")\n", - "torch.Size([784, 64, 1, 256])\n" + "torch.Size([784, 64, 256])\n" ] } ], @@ -192,13 +192,13 @@ "output_type": "stream", "text": [ "TorchCategoricalLayer(\n", - " folds: 784 channels: 1 variables: 1 output-units: 64\n", + " folds: 784 variables: 1 output-units: 64\n", " input-shape: (784, 1, -1, 1)\n", " output-shape: (784, -1, 64)\n", " (probs): PICInputNet(\n", " (reparam): TorchSoftmaxParameter(\n", - " input-shapes: [(784, 64, 1, 256)]\n", - " output-shape: (784, 64, 1, 256)\n", + " input-shapes: [(784, 64, 256)]\n", + " output-shape: (784, 64, 256)\n", " )\n", " (net): Sequential(\n", " (0): FourierLayer(1, 256, sigma=1.0)\n", @@ -208,7 +208,7 @@ " )\n", " )\n", ")\n", - "torch.Size([784, 64, 1, 256])\n" + "torch.Size([784, 64, 256])\n" ] } ], @@ -327,17 +327,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "Step 200: Average NLL: 798.530\n", - "Step 400: Average NLL: 702.238\n", - "Step 600: Average NLL: 685.940\n", - "Step 800: Average NLL: 679.610\n", - "Step 1000: Average NLL: 673.089\n", - "Step 1200: Average NLL: 661.166\n", - "Step 1400: Average NLL: 656.975\n", - "Step 1600: Average NLL: 654.494\n", - "Step 1800: Average NLL: 653.448\n", - "Step 2000: Average NLL: 651.315\n", - "Step 2200: Average NLL: 650.697\n" + "Step 200: Average NLL: 798.034\n", + "Step 400: Average NLL: 699.926\n", + "Step 600: Average NLL: 684.053\n", + "Step 800: Average NLL: 677.998\n", + "Step 1000: Average NLL: 671.159\n", + "Step 1200: Average NLL: 661.711\n", + "Step 1400: Average NLL: 658.074\n", + "Step 1600: Average NLL: 655.282\n", + "Step 1800: Average NLL: 653.680\n", + "Step 2000: Average NLL: 651.430\n", + "Step 2200: Average NLL: 650.717\n" ] } ], @@ -354,7 +354,7 @@ " for i, (batch, _) in enumerate(train_dataloader):\n", " # The circuit expects an input of shape (batch_dim, num_channels, num_variables),\n", " # so we unsqueeze a dimension for the channel.\n", - " batch = batch.to(device).unsqueeze(dim=1)\n", + " batch = batch.to(device)\n", "\n", " # Compute the log-likelihoods of the batch, by evaluating the circuit\n", " log_likelihoods = circuit(batch)\n", @@ -385,7 +385,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "id": "4e66bd8b", "metadata": {}, "outputs": [ @@ -393,8 +393,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "Average test LL: -645.790\n", - "Bits per dimension: 1.188\n" + "Average test LL: -645.984\n", + "Bits per dimension: 1.189\n" ] } ], @@ -405,7 +405,7 @@ " for batch, _ in test_dataloader:\n", " # The circuit expects an input of shape (batch_dim, num_channels, num_variables),\n", " # so we unsqueeze a dimension for the channel.\n", - " batch = batch.to(device).unsqueeze(dim=1)\n", + " batch = batch.to(device)\n", "\n", " # Compute the log-likelihoods of the batch\n", " log_likelihoods = circuit(batch)\n", diff --git a/notebooks/learning-a-circuit.ipynb b/notebooks/learning-a-circuit.ipynb index 64d17023..a55d04f2 100644 --- a/notebooks/learning-a-circuit.ipynb +++ b/notebooks/learning-a-circuit.ipynb @@ -70,7 +70,7 @@ "id": "aa8c6e7c-ad9f-4dd2-ab76-602e191d197b", "metadata": {}, "source": [ - "We can query some information regarding the symbolic circuit, such as the number of variables and channels it is defined on, and which structural properties it does satisfy." + "We can query some information regarding the symbolic circuit, such as the number of variables it is defined on, and which structural properties it does satisfy." ] }, { @@ -89,7 +89,6 @@ "output_type": "stream", "text": [ "Number of variables: 784\n", - "Number of channels per variable: 1\n", "\n", "Structural properties:\n", " - Smoothness: True\n", @@ -101,7 +100,6 @@ "source": [ "# Print some information\n", "print(f'Number of variables: {symbolic_circuit.num_variables}')\n", - "print(f'Number of channels per variable: {symbolic_circuit.num_channels}')\n", "print()\n", "\n", "# Print which structural properties the circuit satisfies\n", @@ -176,8 +174,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 3.54 s, sys: 292 ms, total: 3.84 s\n", - "Wall time: 3.75 s\n" + "CPU times: user 3.1 s, sys: 332 ms, total: 3.43 s\n", + "Wall time: 3.38 s\n" ] } ], @@ -283,7 +281,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "id": "2f28e9c0", "metadata": { "ExecuteTime": { @@ -296,17 +294,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "Step 200: Average NLL: 2492.162\n", - "Step 400: Average NLL: 895.924\n", - "Step 600: Average NLL: 785.733\n", - "Step 800: Average NLL: 749.979\n", - "Step 1000: Average NLL: 729.827\n", - "Step 1200: Average NLL: 716.521\n", - "Step 1400: Average NLL: 707.093\n", - "Step 1600: Average NLL: 698.421\n", - "Step 1800: Average NLL: 693.506\n", - "Step 2000: Average NLL: 687.055\n", - "Step 2200: Average NLL: 684.551\n" + "Step 200: Average NLL: 2491.168\n", + "Step 400: Average NLL: 896.262\n", + "Step 600: Average NLL: 786.486\n", + "Step 800: Average NLL: 749.341\n", + "Step 1000: Average NLL: 729.653\n", + "Step 1200: Average NLL: 716.721\n", + "Step 1400: Average NLL: 706.373\n", + "Step 1600: Average NLL: 698.175\n", + "Step 1800: Average NLL: 690.445\n", + "Step 2000: Average NLL: 686.382\n", + "Step 2200: Average NLL: 681.392\n" ] } ], @@ -321,9 +319,8 @@ "\n", "for epoch_idx in range(num_epochs):\n", " for i, (batch, _) in enumerate(train_dataloader):\n", - " # The circuit expects an input of shape (batch_dim, num_channels, num_variables),\n", - " # so we unsqueeze a dimension for the channel.\n", - " batch = batch.to(device).unsqueeze(dim=1)\n", + " # The circuit expects an input of shape (batch_dim, num_variables)\n", + " batch = batch.to(device)\n", "\n", " # Compute the log-likelihoods of the batch, by evaluating the circuit\n", " log_likelihoods = circuit(batch)\n", @@ -354,7 +351,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "id": "4e66bd8b", "metadata": { "ExecuteTime": { @@ -367,8 +364,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "Average test LL: -683.001\n", - "Bits per dimension: 1.257\n" + "Average test LL: -680.521\n", + "Bits per dimension: 1.252\n" ] } ], @@ -379,7 +376,7 @@ " for batch, _ in test_dataloader:\n", " # The circuit expects an input of shape (batch_dim, num_channels, num_variables),\n", " # so we unsqueeze a dimension for the channel.\n", - " batch = batch.to(device).unsqueeze(dim=1)\n", + " batch = batch.to(device)\n", "\n", " # Compute the log-likelihoods of the batch\n", " log_likelihoods = circuit(batch)\n", diff --git a/notebooks/logic-circuits.ipynb b/notebooks/logic-circuits.ipynb index 5e3ceccf..84afcf29 100644 --- a/notebooks/logic-circuits.ipynb +++ b/notebooks/logic-circuits.ipynb @@ -160,65 +160,65 @@ "\n", "%3\n", "\n", - "\n", + "\n", "\n", - "140165727826944\n", - "\n", - "¬1\n", - "\n", - "\n", - "\n", - "140165727827424\n", - "\n", - "+\n", - "\n", - "\n", - "\n", - "140165727826944->140165727827424\n", - "\n", - "\n", + "140134227331584\n", + "\n", + "\n", "\n", - "\n", + "\n", "\n", - "140165727827472\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "140165727826608\n", - "\n", - "0\n", + "140134227331056\n", + "\n", + "0\n", "\n", - "\n", + "\n", "\n", - "140165727826608->140165727827472\n", - "\n", - "\n", + "140134227331056->140134227331584\n", + "\n", + "\n", "\n", - "\n", + "\n", + "\n", + "140134227331536\n", + "\n", + "+\n", + "\n", + "\n", "\n", - "140165727827424->140165727827472\n", - "\n", - "\n", + "140134227331536->140134227331584\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "140134227330720\n", + "\n", + "¬1\n", "\n", - "\n", + "\n", + "\n", + "140134227330720->140134227331536\n", + "\n", + "\n", + "\n", + "\n", "\n", - "140165727827280\n", - "\n", - "2\n", + "140134227331392\n", + "\n", + "2\n", "\n", - "\n", + "\n", "\n", - "140165727827280->140165727827424\n", - "\n", - "\n", + "140134227331392->140134227331536\n", + "\n", + "\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "execution_count": 3, @@ -257,30 +257,30 @@ "text/plain": [ "TorchCircuit(\n", " (0): TorchCategoricalLayer(\n", - " folds: 1 channels: 1 variables: 1 output-units: 1\n", + " folds: 1 variables: 1 output-units: 1\n", " input-shape: (1, 1, -1, 1)\n", " output-shape: (1, -1, 1)\n", " (probs): TorchParameter(\n", - " shape: (1, 1, 1, 2)\n", - " (0): TorchTensorParameter(output-shape: (1, 1, 1, 2))\n", + " shape: (1, 1, 2)\n", + " (0): TorchTensorParameter(output-shape: (1, 1, 2))\n", " )\n", " )\n", " (1): TorchCategoricalLayer(\n", - " folds: 1 channels: 1 variables: 1 output-units: 1\n", + " folds: 1 variables: 1 output-units: 1\n", " input-shape: (1, 1, -1, 1)\n", " output-shape: (1, -1, 1)\n", " (probs): TorchParameter(\n", - " shape: (1, 1, 1, 2)\n", - " (0): TorchTensorParameter(output-shape: (1, 1, 1, 2))\n", + " shape: (1, 1, 2)\n", + " (0): TorchTensorParameter(output-shape: (1, 1, 2))\n", " )\n", " )\n", " (2): TorchCategoricalLayer(\n", - " folds: 1 channels: 1 variables: 1 output-units: 1\n", + " folds: 1 variables: 1 output-units: 1\n", " input-shape: (1, 1, -1, 1)\n", " output-shape: (1, -1, 1)\n", " (probs): TorchParameter(\n", - " shape: (1, 1, 1, 2)\n", - " (0): TorchTensorParameter(output-shape: (1, 1, 1, 2))\n", + " shape: (1, 1, 2)\n", + " (0): TorchTensorParameter(output-shape: (1, 1, 2))\n", " )\n", " )\n", " (3): TorchSumLayer(\n", @@ -324,7 +324,7 @@ "\n", "Hence, to check if $\\{a, b, c \\}$ is a model we can evaluate the circuit on the tensor $[1.0, 1.0, 1.0]$.\n", "\n", - "Note that `cirkit`'s circuit expects the input to be shaped as `(batch size, number of channels, number of inputs)`. We will shape the `torch` tensor accordingly." + "Note that `cirkit`'s circuit expects the input to be shaped as `(batch size, number of variables)`. We will shape the `torch` tensor accordingly." ] }, { @@ -347,7 +347,7 @@ "source": [ "import torch\n", "\n", - "compiled_circuit(torch.tensor([1.0, 1.0, 1.0]).reshape(1, 1, -1))" + "compiled_circuit(torch.tensor([1.0, 1.0, 1.0]).reshape(1, -1))" ] }, { @@ -376,7 +376,7 @@ } ], "source": [ - "compiled_circuit(torch.tensor([0.0, 1.0, 1.0]).reshape(1, 1, -1))" + "compiled_circuit(torch.tensor([0.0, 1.0, 1.0]).reshape(1, -1))" ] }, { @@ -477,7 +477,7 @@ } ], "source": [ - "compiled_circuit(torch.tensor([1.0, 0.0, 1.0]).reshape(1, 1, -1))" + "compiled_circuit(torch.tensor([1.0, 0.0, 1.0]).reshape(1, -1))" ] }, { @@ -498,7 +498,7 @@ } ], "source": [ - "smooth_compiled_circuit(torch.tensor([1.0, 0.0, 1.0]).reshape(1, 1, -1))" + "smooth_compiled_circuit(torch.tensor([1.0, 0.0, 1.0]).reshape(1, -1))" ] }, { @@ -605,7 +605,7 @@ "\n" ], "text/plain": [ - "" + "" ] }, "execution_count": 11, @@ -737,7 +737,7 @@ "\n" ], "text/plain": [ - "" + "" ] }, "execution_count": 12, @@ -818,148 +818,148 @@ "\n", "\n", - "\n", + "\n", "\n", "%3\n", - "\n", - "\n", + "\n", + "\n", "\n", - "140162778867856\n", - "\n", - "0\n", + "140131062451216\n", + "\n", + "\n", "\n", - "\n", - "\n", - "140162777187280\n", - "\n", - "\n", + "\n", + "\n", + "140131062449584\n", + "\n", + "+\n", "\n", - "\n", - "\n", - "140162778867856->140162777187280\n", - "\n", - "\n", + "\n", + "\n", + "140131062451216->140131062449584\n", + "\n", + "\n", "\n", - "\n", + "\n", "\n", - "140162777186512\n", - "\n", - "¬2\n", + "140131062451456\n", + "\n", + "¬1\n", "\n", - "\n", - "\n", - "140162777186752\n", - "\n", - "+\n", - "\n", - "\n", - "\n", - "140162777186512->140162777186752\n", - "\n", - "\n", + "\n", + "\n", + "140131062451456->140131062451216\n", + "\n", + "\n", "\n", - "\n", + "\n", "\n", - "140162777186656\n", - "\n", - "\n", + "140131062451264\n", + "\n", + "+\n", "\n", - "\n", - "\n", - "140162777187232\n", - "\n", - "+\n", - "\n", - "\n", - "\n", - "140162777186656->140162777187232\n", - "\n", - "\n", + "\n", + "\n", + "140131062451264->140131062451216\n", + "\n", + "\n", "\n", - "\n", + "\n", "\n", - "140162778868096\n", - "\n", - "1\n", + "140131062453808\n", + "\n", + "\n", "\n", - "\n", - "\n", - "140162778868096->140162777186656\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "140162778868192\n", - "\n", - "2\n", - "\n", - "\n", - "\n", - "140162778868192->140162777186656\n", - "\n", - "\n", + "\n", + "\n", + "140131062450544\n", + "\n", + "+\n", "\n", - "\n", - "\n", - "140162778868192->140162777186752\n", - "\n", - "\n", + "\n", + "\n", + "140131062453808->140131062450544\n", + "\n", + "\n", "\n", - "\n", - "\n", - "140162777187712\n", - "\n", - "+\n", + "\n", + "\n", + "140131062451360\n", + "\n", + "0\n", "\n", - "\n", + "\n", "\n", - "140162777187280->140162777187712\n", - "\n", - "\n", + "140131062451360->140131062453808\n", + "\n", + "\n", "\n", - "\n", - "\n", - "140162777187232->140162777187280\n", - "\n", - "\n", + "\n", + "\n", + "140131062449584->140131062453808\n", + "\n", + "\n", "\n", - "\n", - "\n", - "140162777186800\n", - "\n", - "\n", + "\n", + "\n", + "140131062450832\n", + "\n", + "2\n", "\n", - "\n", + "\n", "\n", - "140162777186800->140162777187232\n", - "\n", - "\n", + "140131062450832->140131062451264\n", + "\n", + "\n", "\n", - "\n", + "\n", "\n", - "140162778862576\n", - "\n", - "¬1\n", + "140131062452464\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "140131062450832->140131062452464\n", + "\n", + "\n", "\n", - "\n", + "\n", + "\n", + "140131062453088\n", + "\n", + "¬2\n", + "\n", + "\n", "\n", - "140162778862576->140162777186800\n", - "\n", - "\n", + "140131062453088->140131062451264\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "140131062449824\n", + "\n", + "1\n", "\n", - "\n", + "\n", "\n", - "140162777186752->140162777186800\n", - "\n", - "\n", + "140131062449824->140131062452464\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "140131062452464->140131062449584\n", + "\n", + "\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "execution_count": 14, @@ -1049,7 +1049,7 @@ "# execute the query\n", "# note that the input to the circuit here is not important, since we will marginalize\n", "# over all variables\n", - "marginal_query(torch.tensor([0, 0, 0]).reshape(1, 1, -1), integrate_vars=vars_to_marginalize)" + "marginal_query(torch.tensor([0, 0, 0]).reshape(1, -1), integrate_vars=vars_to_marginalize)" ] }, { @@ -1084,7 +1084,7 @@ "# integrate over b and c\n", "vars_to_marginalize = Scope([1, 2])\n", "marginal_query = IntegrateQuery(alpha_sdd_compiled_circuit)\n", - "marginal_query(torch.tensor([0, 0, 0]).reshape(1, 1, -1), integrate_vars=vars_to_marginalize)" + "marginal_query(torch.tensor([0, 0, 0]).reshape(1, -1), integrate_vars=vars_to_marginalize)" ] }, { @@ -1105,7 +1105,7 @@ } ], "source": [ - "marginal_query(torch.tensor([0, 1, 0]).reshape(1, 1, -1), integrate_vars=vars_to_marginalize)" + "marginal_query(torch.tensor([0, 1, 0]).reshape(1, -1), integrate_vars=vars_to_marginalize)" ] }, { @@ -1126,7 +1126,7 @@ } ], "source": [ - "marginal_query(torch.tensor([0, 0, 1]).reshape(1, 1, -1), integrate_vars=vars_to_marginalize)" + "marginal_query(torch.tensor([0, 0, 1]).reshape(1, -1), integrate_vars=vars_to_marginalize)" ] }, { @@ -1147,7 +1147,7 @@ } ], "source": [ - "marginal_query(torch.tensor([0, 1, 1]).reshape(1, 1, -1), integrate_vars=vars_to_marginalize)" + "marginal_query(torch.tensor([0, 1, 1]).reshape(1, -1), integrate_vars=vars_to_marginalize)" ] }, { diff --git a/notebooks/region-graphs-and-parametrisation.ipynb b/notebooks/region-graphs-and-parametrisation.ipynb index 9c528f3d..a26836b5 100644 --- a/notebooks/region-graphs-and-parametrisation.ipynb +++ b/notebooks/region-graphs-and-parametrisation.ipynb @@ -98,7 +98,7 @@ "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -355,11 +355,10 @@ "\n", "def define_circuit_from_rg(rg: RegionGraph, sum_prod_layer: str = 'cp') -> Circuit:\n", " # Here is where Overparameterization comes in\n", - " input_factory = lambda scope, y, z: CategoricalLayer(\n", + " input_factory = lambda scope, num_units: CategoricalLayer(\n", " scope=scope,\n", " num_categories=PIXEL_RANGE+1,\n", - " num_channels=1, # These are grayscale images\n", - " num_output_units=NUM_INPUT_UNITS # Overparameterization\n", + " num_output_units=num_units # Overparameterization\n", " )\n", "\n", " # We need to specify how to parameterize the sum layers\n", @@ -378,6 +377,7 @@ " input_factory=input_factory,\n", " sum_weight_factory=sum_weight_factory,\n", " nary_sum_weight_factory=nary_sum_weight_factory,\n", + " num_input_units=NUM_INPUT_UNITS,\n", " num_sum_units=NUM_SUM_UNITS,\n", " sum_product=sum_prod_layer\n", " )\n", @@ -429,7 +429,7 @@ "from cirkit.templates.region_graph import RandomBinaryTree\n", "# Note that the random binary tree works on flat inputs (i.e. vectors)\n", "# We therefore compute the number of random variables needed (one per pixel value)\n", - "img_shape = example_image.shape[1:]\n", + "img_shape = example_image.shape\n", "n = np.prod(img_shape)\n", "\n", "# We can also specify depth and number of repetitions\n", @@ -656,11 +656,11 @@ "\n", "Training circuit with region graph \"quad-graph + cp\"\n", "Step 200: Average NLL: 2492.162\n", - "Step 400: Average NLL: 895.924\n", - "Step 600: Average NLL: 785.733\n", - "Step 800: Average NLL: 749.979\n", - "Step 1000: Average NLL: 729.827\n", - "Average test LL: 711.582\n", + "Step 400: Average NLL: 895.923\n", + "Step 600: Average NLL: 785.726\n", + "Step 800: Average NLL: 749.989\n", + "Step 1000: Average NLL: 729.839\n", + "Average test LL: 711.595\n", "Bits per dimension: 1.309\n", "\n", "Training circuit with region graph \"random-binary-tree + cp.T\"\n", @@ -675,10 +675,10 @@ "Training circuit with region graph \"random-binary-tree + Tucker\"\n", "Step 200: Average NLL: 2769.754\n", "Step 400: Average NLL: 1086.759\n", - "Step 600: Average NLL: 930.984\n", - "Step 800: Average NLL: 913.837\n", - "Step 1000: Average NLL: 907.995\n", - "Average test LL: 899.663\n", + "Step 600: Average NLL: 930.981\n", + "Step 800: Average NLL: 913.792\n", + "Step 1000: Average NLL: 907.813\n", + "Average test LL: 899.674\n", "Bits per dimension: 1.656\n", "\n", "Training circuit with region graph \"quad-tree-2 + cp.T\"\n", @@ -692,30 +692,30 @@ "\n", "Training circuit with region graph \"quad-tree-2 + Tucker\"\n", "Step 200: Average NLL: 2794.737\n", - "Step 400: Average NLL: 1010.180\n", - "Step 600: Average NLL: 807.477\n", - "Step 800: Average NLL: 763.874\n", - "Step 1000: Average NLL: 740.645\n", - "Average test LL: 720.697\n", - "Bits per dimension: 1.326\n", + "Step 400: Average NLL: 1010.181\n", + "Step 600: Average NLL: 807.422\n", + "Step 800: Average NLL: 763.685\n", + "Step 1000: Average NLL: 740.025\n", + "Average test LL: 719.982\n", + "Bits per dimension: 1.325\n", "\n", "Training circuit with region graph \"quad-graph + cp.T\"\n", "Step 200: Average NLL: 2553.771\n", - "Step 400: Average NLL: 912.874\n", + "Step 400: Average NLL: 912.875\n", "Step 600: Average NLL: 794.444\n", - "Step 800: Average NLL: 756.515\n", - "Step 1000: Average NLL: 731.716\n", - "Average test LL: 717.640\n", + "Step 800: Average NLL: 756.514\n", + "Step 1000: Average NLL: 731.693\n", + "Average test LL: 717.628\n", "Bits per dimension: 1.321\n", "\n", "Training circuit with region graph \"quad-graph + Tucker\"\n", - "Step 200: Average NLL: 2769.996\n", - "Step 400: Average NLL: 1000.189\n", - "Step 600: Average NLL: 796.516\n", - "Step 800: Average NLL: 753.390\n", - "Step 1000: Average NLL: 730.825\n", - "Average test LL: 713.031\n", - "Bits per dimension: 1.312\n" + "Step 200: Average NLL: 2769.997\n", + "Step 400: Average NLL: 1000.182\n", + "Step 600: Average NLL: 796.553\n", + "Step 800: Average NLL: 753.499\n", + "Step 1000: Average NLL: 731.018\n", + "Average test LL: 713.253\n", + "Bits per dimension: 1.313\n" ] } ], @@ -761,10 +761,9 @@ " \n", " for epoch_idx in range(num_epochs):\n", " for i, (batch, _) in enumerate(train_dataloader):\n", - " # The circuit expects an input of shape (batch_dim, num_channels, num_variables),\n", - " # so we unsqueeze a dimension for the channel.\n", + " # The circuit expects an input of shape (batch_dim, num_variables)\n", " BS = batch.shape[0]\n", - " batch = batch.view(BS, 1, -1).to(device)\n", + " batch = batch.view(BS, -1).to(device)\n", " \n", " # Compute the log-likelihoods of the batch, by evaluating the circuit\n", " log_likelihoods = circuit(batch)\n", @@ -790,10 +789,9 @@ " test_lls = 0.0\n", " \n", " for batch, _ in test_dataloader:\n", - " # The circuit expects an input of shape (batch_dim, num_channels, num_variables),\n", - " # so we unsqueeze a dimension for the channel.\n", + " # The circuit expects an input of shape (batch_dim, num_variables)\n", " BS = batch.shape[0]\n", - " batch = batch.view(BS, 1, -1).to(device)\n", + " batch = batch.view(BS, -1).to(device)\n", " \n", " # Compute the log-likelihoods of the batch\n", " log_likelihoods = circuit(batch)\n", @@ -864,17 +862,17 @@ "
\n", " quad-graph\n", " 25,657,730\n", - " 711.582\n", + " 711.595\n", " 1.309\n", - " 729.827\n", + " 729.839\n", " cp\n", "
\n", "
\n", " quad-graph\n", " 421,306,626\n", - " 713.031\n", - " 1.312\n", - " 730.825\n", + " 713.253\n", + " 1.313\n", + " 731.018\n", " Tucker\n", "
\n", "
\n", @@ -888,17 +886,17 @@ "
\n", " quad-graph\n", " 19,259,778\n", - " 717.640\n", + " 717.628\n", " 1.321\n", - " 731.716\n", + " 731.693\n", " cp.T\n", "
\n", "
\n", " quad-tree-2\n", " 217,845,760\n", - " 720.697\n", - " 1.326\n", - " 740.645\n", + " 719.982\n", + " 1.325\n", + " 740.025\n", " Tucker\n", "
\n", "
\n", @@ -912,9 +910,9 @@ "
\n", " random-binary-tree\n", " 217,845,760\n", - " 899.663\n", + " 899.674\n", " 1.656\n", - " 907.995\n", + " 907.813\n", " Tucker\n", "
\n", "
\n", @@ -939,24 +937,24 @@ ], "text/plain": [ " # trainable parameters test loss \\\n", - "quad-graph 25,657,730 711.582 \n", - "quad-graph 421,306,626 713.031 \n", + "quad-graph 25,657,730 711.595 \n", + "quad-graph 421,306,626 713.253 \n", "quad-tree-2 19,259,456 715.767 \n", - "quad-graph 19,259,778 717.640 \n", - "quad-tree-2 217,845,760 720.697 \n", + "quad-graph 19,259,778 717.628 \n", + "quad-tree-2 217,845,760 719.982 \n", "quad-tree-2 16,048,192 724.647 \n", - "random-binary-tree 217,845,760 899.663 \n", + "random-binary-tree 217,845,760 899.674 \n", "random-binary-tree 19,259,456 912.605 \n", "random-binary-tree 16,048,192 915.956 \n", "\n", " test bits per dimension train loss (min) \\\n", - "quad-graph 1.309 729.827 \n", - "quad-graph 1.312 730.825 \n", + "quad-graph 1.309 729.839 \n", + "quad-graph 1.313 731.018 \n", "quad-tree-2 1.317 734.433 \n", - "quad-graph 1.321 731.716 \n", - "quad-tree-2 1.326 740.645 \n", + "quad-graph 1.321 731.693 \n", + "quad-tree-2 1.325 740.025 \n", "quad-tree-2 1.333 740.717 \n", - "random-binary-tree 1.656 907.995 \n", + "random-binary-tree 1.656 907.813 \n", "random-binary-tree 1.679 917.369 \n", "random-binary-tree 1.686 919.980 \n", "\n", diff --git a/notebooks/sum-of-squares-circuits.ipynb b/notebooks/sum-of-squares-circuits.ipynb index 53ad2267..69160da9 100644 --- a/notebooks/sum-of-squares-circuits.ipynb +++ b/notebooks/sum-of-squares-circuits.ipynb @@ -337,7 +337,7 @@ "torch.cuda.manual_seed(42)\n", "\n", "# Set the torch device to use\n", - "device = torch.device('cuda')\n", + "device = torch.device('cuda:2')\n", "\n", "# Load the MNIST data set and data loaders\n", "transform = transforms.Compose([\n", @@ -439,17 +439,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "Step 300: Average NLL: 1280.626\n", - "Step 600: Average NLL: 761.767\n", - "Step 900: Average NLL: 713.440\n", - "Step 1200: Average NLL: 687.036\n", - "Step 1500: Average NLL: 670.705\n", - "Step 1800: Average NLL: 661.506\n", - "Step 2100: Average NLL: 655.144\n", - "Step 2400: Average NLL: 647.572\n", - "Step 2700: Average NLL: 643.742\n", - "Step 3000: Average NLL: 642.090\n", - "Step 3300: Average NLL: 639.604\n" + "Step 300: Average NLL: 1280.521\n", + "Step 600: Average NLL: 760.516\n", + "Step 900: Average NLL: 712.537\n", + "Step 1200: Average NLL: 686.120\n", + "Step 1500: Average NLL: 669.953\n", + "Step 1800: Average NLL: 660.865\n", + "Step 2100: Average NLL: 654.583\n", + "Step 2400: Average NLL: 647.114\n", + "Step 2700: Average NLL: 643.236\n", + "Step 3000: Average NLL: 641.632\n", + "Step 3300: Average NLL: 639.239\n" ] } ], @@ -464,9 +464,8 @@ "\n", "for epoch_idx in range(num_epochs):\n", " for i, (batch, _) in enumerate(train_dataloader):\n", - " # The circuit expects an input of shape (batch_dim, num_channels, num_variables),\n", - " # so we unsqueeze a dimension for the channel.\n", - " batch = batch.to(device).unsqueeze(dim=1)\n", + " # The circuit expects an input of shape (batch_dim, num_variables)\n", + " batch = batch.to(device)\n", "\n", " # -------- Computation of the negated log-likelihoods loss -------- #\n", " # Compute the logarithm of the squared scores of the batch, by evaluating the circuit\n", @@ -513,7 +512,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Average test LL: -680.126\n", + "Average test LL: -680.173\n", "Bits per dimension: 1.252\n" ] } @@ -527,7 +526,7 @@ "\n", " test_lls = 0.0\n", " for batch, _ in test_dataloader:\n", - " batch = batch.to(device).unsqueeze(dim=1)\n", + " batch = batch.to(device)\n", "\n", " # -------- Compute the log-likelihoods of hte unseen samples -------- #\n", " # Compute the logarithm of the squared scores of the batch, by evaluating the circuit\n", diff --git a/pyproject.toml b/pyproject.toml index fd6bef13..b13a2fa7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,7 +74,8 @@ notebooks = [ "matplotlib", "scikit-learn", "pandas", - "h5py" + "h5py", + "PySDD", ] ################################################################################ diff --git a/tests/backend/torch/test_compile_circuit.py b/tests/backend/torch/test_compile_circuit.py index 0f6065ef..7b4483d1 100644 --- a/tests/backend/torch/test_compile_circuit.py +++ b/tests/backend/torch/test_compile_circuit.py @@ -13,12 +13,10 @@ from cirkit.backend.torch.layers.input import TorchCategoricalLayer from cirkit.backend.torch.semiring import Semiring, SumProductSemiring from cirkit.pipeline import PipelineContext -from cirkit.symbolic.initializers import DirichletInitializer from cirkit.symbolic.layers import CategoricalLayer, HadamardLayer, SumLayer -from cirkit.symbolic.parameters import Parameter, TensorParameter from cirkit.templates.region_graph import QuadGraph -from cirkit.utils.scope import Scope from tests.floats import isclose +from tests.symbolic.test_from_region_graph import categorical_layer_factory from tests.symbolic.test_utils import ( build_monotonic_bivariate_gaussian_hadamard_dense_pc, build_monotonic_structured_categorical_cpt_pc, @@ -33,10 +31,8 @@ def check_discrete_ground_truth( gt_outputs: dict[tuple[int, ...], float], gt_partition_func: float, ): - worlds = torch.tensor(list(itertools.product([0, 1], repeat=tc.num_variables))).unsqueeze( - dim=-2 - ) - assert worlds.shape == (2**tc.num_variables, 1, tc.num_variables) + worlds = torch.tensor(list(itertools.product([0, 1], repeat=tc.num_variables))) + assert worlds.shape == (2**tc.num_variables, tc.num_variables) tc_outputs = tc(worlds) assert tc_outputs.shape == (worlds.shape[0], 1, 1) @@ -63,7 +59,7 @@ def check_continuous_ground_truth( gt_partition_func: float, ): for x, y in gt_outputs.items(): - sample = torch.Tensor(x).unsqueeze(dim=0).unsqueeze(dim=-2) + sample = torch.Tensor(x).unsqueeze(dim=0) tc_output = tc(sample) assert isclose( tc_output, semiring.map_from(torch.tensor(y), SumProductSemiring) @@ -71,30 +67,14 @@ def check_continuous_ground_truth( # Test the integral of the circuit (using a quadrature rule) assert isclose(int_tc(), semiring.map_from(torch.tensor(gt_partition_func), SumProductSemiring)) - df = lambda y, x: torch.exp(tc(torch.Tensor([[[x, y]]]))).squeeze() + df = lambda y, x: torch.exp(tc(torch.Tensor([[x, y]]))).squeeze() int_a, int_b = -np.inf, np.inf ig, err = integrate.dblquad(df, int_a, int_b, int_a, int_b, epsabs=1e-5, epsrel=1e-5) assert isclose(ig, gt_partition_func) -def categorical_layer_factory( - scope: Scope, num_units: int, num_channels: int, *, num_categories: int = 2 -) -> CategoricalLayer: - return CategoricalLayer( - scope, - num_units, - num_channels, - num_categories=num_categories, - probs=Parameter.from_input( - TensorParameter( - num_units, num_channels, num_categories, initializer=DirichletInitializer() - ) - ), - ) - - @pytest.mark.parametrize("fold,optimize", itertools.product([False, True], [False, True])) -def test_circuit_parameters(fold: bool, optimize: bool): +def test_compile_circuit_parameters(fold: bool, optimize: bool): compiler = TorchCompiler(fold=fold) sc = build_multivariate_monotonic_structured_cpt_pc() tc: TorchCircuit = compiler.compile(sc) @@ -133,7 +113,7 @@ def test_compile_monotonic_structured_gaussian_pc(): def test_compile_unoptimized_monotonic_circuit_qg_3x3_cp(): - rg = QuadGraph((3, 3)) + rg = QuadGraph((1, 3, 3)) sc = rg.build_circuit( num_input_units=8, num_sum_units=8, @@ -188,7 +168,7 @@ def test_compile_unoptimized_monotonic_circuit_qg_3x3_cp(): scopes = set() for n1, n2 in zip(nodes_sc[:9], nodes_c[:9]): assert isinstance(n1, CategoricalLayer) - assert isinstance(n2, TorchCategoricalLayer) and n2.probs._nodes[0].shape == (8, 1, 2) + assert isinstance(n2, TorchCategoricalLayer) and n2.probs._nodes[0].shape == (8, 2) scopes.add(tuple(sc.layer_scope(n1))) assert input_scopes == scopes diff --git a/tests/backend/torch/test_compile_circuit_operators.py b/tests/backend/torch/test_compile_circuit_operators.py index d56a0ce2..610772a6 100644 --- a/tests/backend/torch/test_compile_circuit_operators.py +++ b/tests/backend/torch/test_compile_circuit_operators.py @@ -7,15 +7,17 @@ import torch from scipy import integrate -import cirkit.symbolic.functional as SF from cirkit.backend.torch.circuits import TorchCircuit, TorchConstantCircuit from cirkit.backend.torch.compiler import TorchCompiler from cirkit.backend.torch.layers.input import TorchEvidenceLayer from cirkit.backend.torch.semiring import SumProductSemiring +from cirkit.symbolic import functional as SF from cirkit.symbolic.layers import PolynomialLayer +from cirkit.utils.scope import Scope from tests.floats import allclose, isclose from tests.symbolic.test_utils import ( build_bivariate_monotonic_structured_cpt_pc, + build_monotonic_bivariate_gaussian_hadamard_dense_pc, build_monotonic_structured_categorical_cpt_pc, build_multivariate_monotonic_structured_cpt_pc, ) @@ -92,9 +94,7 @@ def test_compile_product_integrate_pc_categorical( assert 0.0 < z.item() < 1.0 elif semiring == "lse-sum": assert -np.inf < z.item() < 0.0 - worlds = torch.tensor(list(itertools.product([0, 1], repeat=tc.num_variables))).unsqueeze( - dim=-2 - ) + worlds = torch.tensor(list(itertools.product([0, 1], repeat=tc.num_variables))) scores = tc(worlds) assert scores.shape == (2**tc.num_variables, 1, 1) scores = scores.squeeze() @@ -125,7 +125,7 @@ def test_compile_product_integrate_pc_gaussian(): # Test the products of the circuits evaluated over _some_ possible assignments xs = torch.linspace(-5, 5, steps=16) ys = torch.linspace(-5, 5, steps=16) - points = torch.stack(torch.meshgrid(xs, ys, indexing="xy"), dim=1).view(-1, 1, 2) + points = torch.stack(torch.meshgrid(xs, ys, indexing="xy"), dim=1).view(-1, 2) scores = tc(points) scores = scores.squeeze() each_tc_scores = torch.stack([tci(points).squeeze() for tci in tcs], dim=0) @@ -135,7 +135,7 @@ def test_compile_product_integrate_pc_gaussian(): z = int_tc() assert z.shape == (1, 1) z = z.squeeze() - df = lambda y, x: torch.exp(tc(torch.Tensor([[[x, y]]]))).squeeze() + df = lambda y, x: torch.exp(tc(torch.Tensor([[x, y]]))).squeeze() int_a, int_b = -np.inf, np.inf ig, err = integrate.dblquad(df, int_a, int_b, int_a, int_b, epsabs=1e-5, epsrel=1e-5) assert isclose(ig, torch.exp(z).item()) @@ -168,9 +168,8 @@ def test_compile_product_pc_polynomial( .new_tensor( # degp1**D should be able to determine the coeffs. list(itertools.product(range(degp1), repeat=num_variables)) # type: ignore[misc] ) - .unsqueeze(dim=-2) .requires_grad_() - ) # shape (B, C=1, D=num_variables). + ) # shape (B, D=num_variables). zs = torch.stack([tci(inputs) for tci in tcs], dim=0) # shape num_prod * (B, num_out=1, num_cls=1). @@ -206,28 +205,93 @@ def test_compile_differentiate_pc_polynomial(semiring: str, fold: bool, optimize tc: TorchCircuit = compiler.get_compiled_circuit(sc) assert isinstance(tc, TorchCircuit) - inputs = ( - torch.tensor([[0.0] * num_variables, range(num_variables)]) # type: ignore[misc] - .unsqueeze(dim=-2) - .requires_grad_() - ) # shape (B=2, C=1, D=num_variables). + inputs = torch.tensor( + [[0.0] * num_variables, range(num_variables)] + ).requires_grad_() # type: ignore[misc] # shape (B=2, D=num_variables). with torch.enable_grad(): output = tc(inputs) assert output.shape == (2, 1, 1) # shape (B=2, num_out=1, num_cls=1). (grad_autodiff,) = torch.autograd.grad( output, inputs, torch.ones_like(output) - ) # shape (B=2, C=1, D=num_variables). + ) # shape (B=2, D=num_variables). grad = diff_tc(inputs) - assert grad.shape == (2, num_variables + 1, 1) # shape (B=2, num_out=1*(D*C+1), num_cls=1). - # shape (B=2, num_out=D, num_cls=1) -> (B=2, C=1, D=num_variables). - grad = grad[:, :-1, :].movedim(1, 2) + assert grad.shape == (2, num_variables + 1, 1) # shape (B=2, num_out=1*(D*1), num_cls=1). + # shape (B=2, num_out=D, num_cls=1) -> (B=2, D=num_variables). + grad = grad[:, :-1].squeeze(dim=2) # TODO: what if num_cls!=1? if semiring == "sum-product": assert allclose(grad, grad_autodiff) elif semiring == "complex-lse-sum": # NOTE: grad = log ∂ C; grad_autodiff = ∂ log C = ∂ C / C = ∂ C / exp(output) - assert allclose(torch.exp(grad), grad_autodiff * torch.exp(output)) + assert allclose(torch.exp(grad), grad_autodiff * torch.exp(output.squeeze(dim=1))) else: assert False + + +@pytest.mark.parametrize( + "semiring,fold,optimize", + itertools.product(["lse-sum", "sum-product"], [False, True], [False, True]), +) +def test_compile_marginalize_monotonic_pc_categorical(semiring: str, fold: bool, optimize: bool): + compiler = TorchCompiler(semiring=semiring, fold=fold, optimize=optimize) + sc, gt_outputs, gt_partition_func = build_monotonic_structured_categorical_cpt_pc( + return_ground_truth=True + ) + + mar_sc = SF.integrate(sc, scope=Scope([4])) + mar_tc: TorchCircuit = compiler.compile(mar_sc) + assert isinstance(mar_tc, TorchCircuit) + tc: TorchCircuit = compiler.get_compiled_circuit(sc) + assert isinstance(tc, TorchCircuit) + + worlds = torch.tensor(list(itertools.product([0, 1], repeat=tc.num_variables))) + scores = tc(worlds) + assert scores.shape == (2**tc.num_variables, 1, 1) + scores = scores.squeeze() + + mar_worlds = torch.cat( + [ + torch.tensor(list(itertools.product([0, 1], repeat=tc.num_variables - 1))), + torch.zeros(2 ** (tc.num_variables - 1), dtype=torch.int64).unsqueeze(dim=-1), + ], + dim=1, + ) + mar_scores = mar_tc(mar_worlds) + assert mar_scores.shape == (2 ** (tc.num_variables - 1), 1, 1) + mar_scores = mar_scores.squeeze() + assert allclose(compiler.semiring.sum(scores.view(-1, 2), dim=1), mar_scores) + + for x, y in gt_outputs["mar"].items(): + idx = int("".join(map(str, filter(lambda z: z != None, x))), base=2) + assert isclose( + mar_scores[idx], compiler.semiring.map_from(torch.tensor(y), SumProductSemiring) + ), f"Input: {x}" + + +def test_compile_marginalize_monotonic_pc_gaussian(): + compiler = TorchCompiler(fold=True, optimize=True, semiring="lse-sum") + sc, gt_outputs, gt_partition_func = build_monotonic_bivariate_gaussian_hadamard_dense_pc( + return_ground_truth=True + ) + + mar_sc = SF.integrate(sc, scope=Scope([1])) + mar_tc: TorchCircuit = compiler.compile(mar_sc) + assert isinstance(mar_tc, TorchCircuit) + tc: TorchCircuit = compiler.get_compiled_circuit(sc) + assert isinstance(tc, TorchCircuit) + + for x, y in gt_outputs["mar"].items(): + x = tuple(0.0 if z is None else z for z in x) + sample = torch.Tensor(x).unsqueeze(dim=0) + tc_output = mar_tc(sample) + assert isclose( + tc_output, compiler.semiring.map_from(torch.tensor(y), SumProductSemiring) + ), f"Input: {x}" + + # Test the integral of the marginal circuit (using a quadrature rule) + df = lambda x: torch.exp(mar_tc(torch.Tensor([[x, 0.0]]))).squeeze() + int_a, int_b = -np.inf, np.inf + ig, err = integrate.quad(df, int_a, int_b) + assert isclose(ig, gt_partition_func) diff --git a/tests/backend/torch/test_compile_initializer.py b/tests/backend/torch/test_compile_initializer.py index e3584742..60cd9f47 100644 --- a/tests/backend/torch/test_compile_initializer.py +++ b/tests/backend/torch/test_compile_initializer.py @@ -5,7 +5,7 @@ from cirkit.symbolic.initializers import ConstantTensorInitializer -def test_constant_tensor_initializer(): +def test_compile_initializer_constant_tensor(): compiler = TorchCompiler() array = np.arange(10) symbolic_initializer = ConstantTensorInitializer(array) diff --git a/tests/backend/torch/test_compile_marginalization.py b/tests/backend/torch/test_compile_marginalization.py deleted file mode 100644 index c4eb43a9..00000000 --- a/tests/backend/torch/test_compile_marginalization.py +++ /dev/null @@ -1,90 +0,0 @@ -import itertools - -import numpy as np -import pytest -import torch -from scipy import integrate - -import cirkit.symbolic.functional as SF -from cirkit.backend.torch.circuits import TorchCircuit -from cirkit.backend.torch.compiler import TorchCompiler -from cirkit.backend.torch.semiring import SumProductSemiring -from cirkit.utils.scope import Scope -from tests.floats import allclose, isclose -from tests.symbolic.test_utils import ( - build_monotonic_bivariate_gaussian_hadamard_dense_pc, - build_monotonic_structured_categorical_cpt_pc, -) - - -@pytest.mark.parametrize( - "semiring,fold,optimize", - itertools.product(["lse-sum", "sum-product"], [False, True], [False, True]), -) -def test_marginalize_monotonic_pc_categorical(semiring: str, fold: bool, optimize: bool): - compiler = TorchCompiler(semiring=semiring, fold=fold, optimize=optimize) - sc, gt_outputs, gt_partition_func = build_monotonic_structured_categorical_cpt_pc( - return_ground_truth=True - ) - - mar_sc = SF.integrate(sc, scope=Scope([4])) - mar_tc: TorchCircuit = compiler.compile(mar_sc) - assert isinstance(mar_tc, TorchCircuit) - tc: TorchCircuit = compiler.get_compiled_circuit(sc) - assert isinstance(tc, TorchCircuit) - - worlds = torch.tensor(list(itertools.product([0, 1], repeat=tc.num_variables))).unsqueeze( - dim=-2 - ) - scores = tc(worlds) - assert scores.shape == (2**tc.num_variables, 1, 1) - scores = scores.squeeze() - - mar_worlds = torch.cat( - [ - torch.tensor(list(itertools.product([0, 1], repeat=tc.num_variables - 1))).unsqueeze( - dim=-2 - ), - torch.zeros(2 ** (tc.num_variables - 1), dtype=torch.int64) - .unsqueeze(dim=-1) - .unsqueeze(dim=-1), - ], - dim=2, - ) - mar_scores = mar_tc(mar_worlds) - assert mar_scores.shape == (2 ** (tc.num_variables - 1), 1, 1) - mar_scores = mar_scores.squeeze() - assert allclose(compiler.semiring.sum(scores.view(-1, 2), dim=1), mar_scores) - - for x, y in gt_outputs["mar"].items(): - idx = int("".join(map(str, filter(lambda z: z != None, x))), base=2) - assert isclose( - mar_scores[idx], compiler.semiring.map_from(torch.tensor(y), SumProductSemiring) - ), f"Input: {x}" - - -def test_marginalize_monotonic_pc_gaussian(): - compiler = TorchCompiler(fold=True, optimize=True, semiring="lse-sum") - sc, gt_outputs, gt_partition_func = build_monotonic_bivariate_gaussian_hadamard_dense_pc( - return_ground_truth=True - ) - - mar_sc = SF.integrate(sc, scope=Scope([1])) - mar_tc: TorchCircuit = compiler.compile(mar_sc) - assert isinstance(mar_tc, TorchCircuit) - tc: TorchCircuit = compiler.get_compiled_circuit(sc) - assert isinstance(tc, TorchCircuit) - - for x, y in gt_outputs["mar"].items(): - x = tuple(0.0 if z is None else z for z in x) - sample = torch.Tensor(x).unsqueeze(dim=0).unsqueeze(dim=-2) - tc_output = mar_tc(sample) - assert isclose( - tc_output, compiler.semiring.map_from(torch.tensor(y), SumProductSemiring) - ), f"Input: {x}" - - # Test the integral of the marginal circuit (using a quadrature rule) - df = lambda x: torch.exp(mar_tc(torch.Tensor([[[x, 0.0]]]))).squeeze() - int_a, int_b = -np.inf, np.inf - ig, err = integrate.quad(df, int_a, int_b) - assert isclose(ig, gt_partition_func) diff --git a/tests/backend/torch/test_queries/test_integration.py b/tests/backend/torch/test_queries/test_integration.py index 07cfee02..46769ecb 100644 --- a/tests/backend/torch/test_queries/test_integration.py +++ b/tests/backend/torch/test_queries/test_integration.py @@ -32,14 +32,10 @@ def test_query_marginalize_monotonic_pc_categorical(semiring: str, fold: bool, o mar_worlds = torch.cat( [ - torch.tensor(list(itertools.product([0, 1], repeat=tc.num_variables - 1))).unsqueeze( - dim=-2 - ), - torch.zeros(2 ** (tc.num_variables - 1), dtype=torch.int64) - .unsqueeze(dim=-1) - .unsqueeze(dim=-1), + torch.tensor(list(itertools.product([0, 1], repeat=tc.num_variables - 1))), + torch.zeros(2 ** (tc.num_variables - 1), dtype=torch.int64).unsqueeze(dim=-1), ], - dim=2, + dim=1, ) mar_scores1 = mar_tc(mar_worlds) mar_query = IntegrateQuery(tc) @@ -52,7 +48,7 @@ def test_query_marginalize_monotonic_pc_categorical(semiring: str, fold: bool, o "semiring,fold,optimize,input_tensor", itertools.product(["lse-sum", "sum-product"], [False, True], [False, True], [False, True]), ) -def test_batch_query_marginalize_monotonic_pc_categorical( +def test_query_marginalize_match_monotonic_pc_categorical( semiring: str, fold: bool, optimize: bool, input_tensor: bool ): # Check using a mask with batching works @@ -66,7 +62,7 @@ def test_batch_query_marginalize_monotonic_pc_categorical( tc: TorchCircuit = compiler.compile(sc) # The marginal has been computed for (1, 0, 1, 1, None) -- so marginalising var 4. - inputs = torch.tensor([[[1, 0, 1, 1, 1], [1, 0, 1, 1, 1]]], dtype=torch.int64).view(2, 1, 5) + inputs = torch.tensor([[1, 0, 1, 1, 1], [1, 0, 1, 1, 1]], dtype=torch.int64) mar_query = IntegrateQuery(tc) if input_tensor: @@ -96,7 +92,7 @@ def test_batch_query_marginalize_monotonic_pc_categorical( "semiring,fold,optimize,input_tensor", itertools.product(["lse-sum", "sum-product"], [False, True], [False, True], [False, True]), ) -def test_batch_broadcast_query_marginalize_monotonic_pc_categorical( +def test_query_marginalize_batch_broadcast_monotonic_pc_categorical( semiring: str, fold: bool, optimize: bool, input_tensor: bool ): # Check that passing a single mask results in broadcasting @@ -110,7 +106,7 @@ def test_batch_broadcast_query_marginalize_monotonic_pc_categorical( tc: TorchCircuit = compiler.compile(sc) # The marginal has been computed for (1, 0, 1, 1, None) -- so marginalising var 4. - inputs = torch.tensor([[[1, 0, 1, 1, 0], [1, 0, 1, 1, 1]]], dtype=torch.int64).view(2, 1, 5) + inputs = torch.tensor([[1, 0, 1, 1, 0], [1, 0, 1, 1, 1]], dtype=torch.int64) mar_query = IntegrateQuery(tc) if input_tensor: @@ -137,7 +133,7 @@ def test_batch_broadcast_query_marginalize_monotonic_pc_categorical( "input_tensor", itertools.product([False, True]), ) -def test_batch_fails_on_out_of_scope( +def test_query_marginalize_batch_fails_on_out_of_scope( input_tensor, semiring="sum-product", fold=True, optimize=True ): # Check that passing a single mask results in broadcasting @@ -151,7 +147,7 @@ def test_batch_fails_on_out_of_scope( tc: TorchCircuit = compiler.compile(sc) # The marginal has been computed for (1, 0, 1, 1, None) -- so marginalising var 4. - inputs = torch.tensor([[[1, 0, 1, 1, 0], [1, 0, 1, 1, 1]]], dtype=torch.int64).view(2, 1, 5) + inputs = torch.tensor([[1, 0, 1, 1, 0], [1, 0, 1, 1, 1]], dtype=torch.int64) mar_query = IntegrateQuery(tc) if input_tensor: @@ -177,7 +173,7 @@ def test_batch_fails_on_out_of_scope( "input_tensor", itertools.product([False, True]), ) -def test_batch_fails_on_wrong_batch_size( +def test_marginalize_batch_fails_on_wrong_batch_size( input_tensor, semiring="sum-product", fold=True, optimize=True ): # Check that passing a single mask results in broadcasting @@ -191,7 +187,7 @@ def test_batch_fails_on_wrong_batch_size( tc: TorchCircuit = compiler.compile(sc) # The marginal has been computed for (1, 0, 1, 1, None) -- so marginalising var 4. - inputs = torch.tensor([[[1, 0, 1, 1, 0], [1, 0, 1, 1, 1]]], dtype=torch.int64).view(2, 1, 5) + inputs = torch.tensor([[1, 0, 1, 1, 0], [1, 0, 1, 1, 1]], dtype=torch.int64) mar_query = IntegrateQuery(tc) if input_tensor: @@ -217,7 +213,9 @@ def test_batch_fails_on_wrong_batch_size( mar_scores = mar_query(inputs, integrate_vars=mask) -def test_batch_fails_on_wrong_tensor_dtype(semiring="sum-product", fold=True, optimize=True): +def test_marginalize_batch_fails_on_wrong_tensor_dtype( + semiring="sum-product", fold=True, optimize=True +): # Check that passing a single mask results in broadcasting compiler = TorchCompiler(semiring=semiring, fold=fold, optimize=optimize) # The following function computes a circuit where we have computed the @@ -229,7 +227,7 @@ def test_batch_fails_on_wrong_tensor_dtype(semiring="sum-product", fold=True, op tc: TorchCircuit = compiler.compile(sc) # The marginal has been computed for (1, 0, 1, 1, None) -- so marginalising var 4. - inputs = torch.tensor([[[1, 0, 1, 1, 0], [1, 0, 1, 1, 1]]], dtype=torch.int64).view(2, 1, 5) + inputs = torch.tensor([[1, 0, 1, 1, 0], [1, 0, 1, 1, 1]], dtype=torch.int64) mar_query = IntegrateQuery(tc) diff --git a/tests/backend/torch/test_queries/test_sampling.py b/tests/backend/torch/test_queries/test_sampling.py index a2a907a9..2b0e6678 100644 --- a/tests/backend/torch/test_queries/test_sampling.py +++ b/tests/backend/torch/test_queries/test_sampling.py @@ -7,14 +7,15 @@ from cirkit.backend.torch.compiler import TorchCompiler from cirkit.backend.torch.queries import SamplingQuery from tests.floats import allclose -from tests.symbolic.test_utils import build_multivariate_monotonic_structured_cpt_pc +from tests.symbolic.test_utils import build_multivariate_monotonic_structured_cpt_pc, \ + build_bivariate_monotonic_structured_cpt_pc @pytest.mark.parametrize( "fold,optimize", itertools.product([False, True], [False, True]), ) -def test_quary_unconditional_sampling(fold: bool, optimize: bool): +def test_query_unconditional_sampling(fold: bool, optimize: bool): compiler = TorchCompiler(semiring="lse-sum", fold=fold, optimize=optimize) sc = build_multivariate_monotonic_structured_cpt_pc( num_units=2, input_layer="bernoulli", parameterize=True, normalized=True @@ -22,10 +23,8 @@ def test_quary_unconditional_sampling(fold: bool, optimize: bool): tc: TorchCircuit = compiler.compile(sc) # Compute the probabilities - worlds = torch.tensor(list(itertools.product([0, 1], repeat=tc.num_variables))).unsqueeze( - dim=-2 - ) - assert worlds.shape == (2**tc.num_variables, 1, tc.num_variables) + worlds = torch.tensor(list(itertools.product([0, 1], repeat=tc.num_variables))) + assert worlds.shape == (2**tc.num_variables, tc.num_variables) tc_outputs = tc(worlds) assert tc_outputs.shape == (2**tc.num_variables, 1, 1) assert torch.all(torch.isfinite(tc_outputs)) @@ -35,9 +34,9 @@ def test_quary_unconditional_sampling(fold: bool, optimize: bool): # Sample data points unconditionally num_samples = 1_000_000 query = SamplingQuery(tc) - # samples: (num_samples, C, D) + # samples: (num_samples, D) samples, _ = query(num_samples=num_samples) - assert samples.shape == (num_samples, 1, tc.num_variables) + assert samples.shape == (num_samples, tc.num_variables) samples = samples.squeeze(dim=1) # Map samples to indices of the probabilities computed above @@ -47,4 +46,4 @@ def test_quary_unconditional_sampling(fold: bool, optimize: bool): # Compute ratios and compare with the probabilities _, counts = torch.unique(samples_idx, return_counts=True) ratios = counts / num_samples - assert allclose(ratios, probs, atol=1e-3) + assert allclose(ratios, probs, rtol=3e-2) diff --git a/tests/backend/torch/test_semiring.py b/tests/backend/torch/test_semiring.py index 038c71ea..7ee89a5a 100644 --- a/tests/backend/torch/test_semiring.py +++ b/tests/backend/torch/test_semiring.py @@ -6,7 +6,7 @@ from tests.floats import allclose -def test_complex_safelog_derivative(): +def test_semiring_complex_safelog_derivative(): torch.set_grad_enabled(True) z = torch.randn(512, dtype=torch.complex128) z.requires_grad = True @@ -36,7 +36,7 @@ def test_complex_safelog_derivative(): assert torch.all(torch.isfinite(z.grad)) -def test_complex_lse_sum_semiring(): +def test_semiring_complex_lse_sum_semiring(): torch.set_default_dtype(torch.float32) x = torch.tensor( diff --git a/tests/backend/torch/test_serialization.py b/tests/backend/torch/test_serialization.py index 0869df1b..9f2b48d2 100644 --- a/tests/backend/torch/test_serialization.py +++ b/tests/backend/torch/test_serialization.py @@ -14,14 +14,12 @@ "semiring,fold,optimize", itertools.product(["sum-product", "lse-sum"], [False, True], [False, True]), ) -def test_save_load_statedict(semiring: str, fold: bool, optimize: bool): +def test_serialization_save_load_statedict(semiring: str, fold: bool, optimize: bool): compiler = TorchCompiler(semiring=semiring, fold=fold, optimize=optimize) sc = build_monotonic_structured_categorical_cpt_pc(return_ground_truth=False) tc: TorchCircuit = compiler.compile(sc) - worlds = torch.tensor(list(itertools.product([0, 1], repeat=tc.num_variables))).unsqueeze( - dim=-2 - ) + worlds = torch.tensor(list(itertools.product([0, 1], repeat=tc.num_variables))) scores = tc(worlds) assert scores.shape == (len(worlds), 1, 1) diff --git a/tests/symbolic/test_circuit_operators.py b/tests/symbolic/test_circuit_operators.py index 3c567200..ea9cdf4d 100644 --- a/tests/symbolic/test_circuit_operators.py +++ b/tests/symbolic/test_circuit_operators.py @@ -5,7 +5,6 @@ import pytest import cirkit.symbolic.functional as SF -from cirkit.pipeline import PipelineContext from cirkit.symbolic.circuit import are_compatible from cirkit.symbolic.layers import ( CategoricalLayer, @@ -31,7 +30,7 @@ "num_units,input_layer", itertools.product([1, 3], ["bernoulli", "gaussian"]), ) -def test_evidence_circuit(num_units: int, input_layer: str): +def test_symop_evidence_circuit(num_units: int, input_layer: str): sc = build_multivariate_monotonic_structured_cpt_pc( num_units=num_units, input_layer=input_layer ) @@ -50,7 +49,7 @@ def test_evidence_circuit(num_units: int, input_layer: str): assert isinstance( evi_layer.layer, CategoricalLayer if input_layer == "bernoulli" else GaussianLayer ) - assert evi_layer.observation.shape == (1, 1) + assert evi_layer.observation.shape == (1,) assert len(list(evi_sc.inner_layers)) == len(list(sc.inner_layers)) @@ -58,7 +57,7 @@ def test_evidence_circuit(num_units: int, input_layer: str): "num_units,input_layer", itertools.product([1, 3], ["bernoulli", "gaussian"]), ) -def test_integrate_circuit(num_units: int, input_layer: str): +def test_symop_integrate_circuit(num_units: int, input_layer: str): sc = build_multivariate_monotonic_structured_cpt_pc( num_units=num_units, input_layer=input_layer ) @@ -76,7 +75,7 @@ def test_integrate_circuit(num_units: int, input_layer: str): "num_units,input_layer", itertools.product([1, 3], ["bernoulli", "gaussian", "polynomial"]), ) -def test_multiply_circuits(num_units: int, input_layer: str): +def test_symop_multiply_circuits(num_units: int, input_layer: str): sc1 = build_multivariate_monotonic_structured_cpt_pc( num_units=num_units, input_layer=input_layer ) @@ -116,7 +115,7 @@ def test_multiply_circuits(num_units: int, input_layer: str): "num_units,input_layer", itertools.product([1, 3], ["bernoulli", "gaussian"]), ) -def test_multiply_evidence_circuit(num_units: int, input_layer: str): +def test_symop_multiply_evidence_circuit(num_units: int, input_layer: str): sc1 = build_multivariate_monotonic_structured_cpt_pc( num_units=num_units, input_layer=input_layer ) @@ -167,7 +166,7 @@ def test_multiply_evidence_circuit(num_units: int, input_layer: str): "num_units,input_layer", itertools.product([1, 3], ["bernoulli", "embedding", "gaussian"]), ) -def test_multiply_integrate_circuits(num_units: int, input_layer: str): +def test_symop_multiply_integrate_circuits(num_units: int, input_layer: str): sc1 = build_multivariate_monotonic_structured_cpt_pc( num_units=num_units, input_layer=input_layer ) @@ -198,7 +197,7 @@ def test_multiply_integrate_circuits(num_units: int, input_layer: str): "num_units,input_layer", itertools.product([1, 3], ["bernoulli", "gaussian", "polynomial"]), ) -def test_conjugate_circuit(num_units: int, input_layer: str): +def test_symop_conjugate_circuit(num_units: int, input_layer: str): sc1 = build_multivariate_monotonic_structured_cpt_pc( num_units=num_units, input_layer=input_layer ) @@ -230,7 +229,7 @@ def _batched(iterable: Iterable[_T_co], n: int) -> Iterable[tuple[_T_co, ...]]: @pytest.mark.parametrize("num_units", [1, 3]) -def test_differentiate_circuit(num_units: int) -> None: +def test_symop_differentiate_circuit(num_units: int) -> None: sc = build_multivariate_monotonic_structured_cpt_pc( num_units=num_units, input_layer="polynomial" ) diff --git a/tests/symbolic/test_from_region_graph.py b/tests/symbolic/test_from_region_graph.py index 35d5bce9..a6a25d6f 100644 --- a/tests/symbolic/test_from_region_graph.py +++ b/tests/symbolic/test_from_region_graph.py @@ -6,23 +6,20 @@ def categorical_layer_factory( - scope: Scope, num_units: int, num_channels: int, *, num_categories: int = 2 + scope: Scope, num_units: int, *, num_categories: int = 2 ) -> CategoricalLayer: return CategoricalLayer( scope, num_units, - num_channels, num_categories=num_categories, probs=Parameter.from_input( - TensorParameter( - num_units, num_channels, num_categories, initializer=DirichletInitializer() - ) + TensorParameter(num_units, num_categories, initializer=DirichletInitializer()) ), ) def test_build_circuit_qg_3x3_cp(): - rg = QuadGraph((3, 3)) + rg = QuadGraph((1, 3, 3)) sc = rg.build_circuit( num_input_units=3, num_sum_units=2, @@ -49,7 +46,7 @@ def test_build_circuit_qg_3x3_cp(): def test_build_circuit_qt4_3x3_cp(): - rg = QuadTree((3, 3), num_patch_splits=4) + rg = QuadTree((1, 3, 3), num_patch_splits=4) sc = rg.build_circuit( num_input_units=3, num_sum_units=2, diff --git a/tests/symbolic/test_utils.py b/tests/symbolic/test_utils.py index a5816a22..8b6ce80f 100644 --- a/tests/symbolic/test_utils.py +++ b/tests/symbolic/test_utils.py @@ -58,7 +58,6 @@ def build_bivariate_monotonic_structured_cpt_pc( (vid,): CategoricalLayer( Scope([vid]), num_output_units=num_units, - num_channels=1, num_categories=2, logits_factory=logits_factory, probs_factory=probs_factory, @@ -76,7 +75,6 @@ def build_bivariate_monotonic_structured_cpt_pc( (vid,): GaussianLayer( Scope([vid]), num_output_units=num_units, - num_channels=1, stddev_factory=stddev_factory, ) for vid in range(2) @@ -121,7 +119,6 @@ def build_bivariate_monotonic_structured_cpt_pc( # Build the symbolic circuit circuit = Circuit( - num_channels=1, layers=list(itertools.chain(input_layers.values(), [product_layer], dense_layers.values())), in_layers=in_layers, outputs=[dense_layers[(0, 1)]], @@ -164,7 +161,6 @@ def build_multivariate_monotonic_structured_cpt_pc( (vid,): CategoricalLayer( Scope([vid]), num_output_units=num_units, - num_channels=1, num_categories=2, logits_factory=logits_factory, probs_factory=probs_factory, @@ -176,7 +172,6 @@ def build_multivariate_monotonic_structured_cpt_pc( (vid,): EmbeddingLayer( Scope([vid]), num_output_units=num_units, - num_channels=1, num_states=2, ) for vid in range(5) @@ -192,7 +187,6 @@ def build_multivariate_monotonic_structured_cpt_pc( (vid,): GaussianLayer( Scope([vid]), num_output_units=num_units, - num_channels=1, stddev_factory=stddev_factory, ) for vid in range(5) @@ -208,7 +202,6 @@ def build_multivariate_monotonic_structured_cpt_pc( (vid,): PolynomialLayer( Scope([vid]), num_output_units=num_units, - num_channels=1, degree=2, # TODO: currently hard-coded coeff_factory=coeff_factory, ) @@ -261,7 +254,6 @@ def build_multivariate_monotonic_structured_cpt_pc( # Build the symbolic circuit circuit = Circuit( - num_channels=1, layers=list( itertools.chain(input_layers.values(), product_layers.values(), dense_layers.values()) ), @@ -281,11 +273,11 @@ def build_monotonic_structured_categorical_cpt_pc( ) -> Circuit | tuple[Circuit, dict[str, dict[tuple[int, ...], float]], float]: # The probabilities of Bernoulli layers bernoulli_probs: dict[tuple[int, ...], np.ndarray] = { - (0,): np.array([[[0.5, 0.5]], [[0.4, 0.6]]]), - (1,): np.array([[[0.2, 0.8]], [[0.3, 0.7]]]), - (2,): np.array([[[0.3, 0.7]], [[0.1, 0.9]]]), - (3,): np.array([[[0.5, 0.5]], [[0.5, 0.5]]]), - (4,): np.array([[[0.1, 0.9]], [[0.8, 0.2]]]), + (0,): np.array([[0.5, 0.5], [0.4, 0.6]]), + (1,): np.array([[0.2, 0.8], [0.3, 0.7]]), + (2,): np.array([[0.3, 0.7], [0.1, 0.9]]), + (3,): np.array([[0.5, 0.5], [0.5, 0.5]]), + (4,): np.array([[0.1, 0.9], [0.8, 0.2]]), } # The parameters of dense weights @@ -405,8 +397,8 @@ def build_monotonic_bivariate_gaussian_hadamard_dense_pc( ) -> Circuit | tuple[Circuit, dict[str, dict[tuple[int, ...], float]], float]: # The mean and standard deviations of Gaussian layers gaussian_mean_stddev: dict[tuple[int, ...], tuple[np.ndarray, np.ndarray]] = { - (0,): (np.array([[0.0], [0.5]]), np.array([[1.0], [0.5]])), - (1,): (np.array([[2.0], [-1.0]]), np.array([[1.5], [2.0]])), + (0,): (np.array([0.0, 0.5]), np.array([1.0, 0.5])), + (1,): (np.array([2.0, -1.0]), np.array([1.5, 2.0])), } # The parameters of dense weights diff --git a/tests/templates/region_graph/test_algorithms.py b/tests/templates/region_graph/test_algorithms.py index 71049f6f..d623a28e 100644 --- a/tests/templates/region_graph/test_algorithms.py +++ b/tests/templates/region_graph/test_algorithms.py @@ -1,5 +1,6 @@ import itertools +import numpy as np import pytest from cirkit.templates.region_graph import ( @@ -87,16 +88,19 @@ def test_rg_algorithm_random_binary_tree( @pytest.mark.parametrize( - "shape,num_patch_splits", itertools.product([(1, 1), (1, 3), (3, 1), (3, 3), (4, 4)], [2, 4]) + "shape,num_patch_splits", + itertools.product( + [(1, 1, 1), (1, 1, 3), (1, 3, 1), (1, 3, 3), (3, 3, 3), (1, 4, 4), (3, 4, 4)], [2, 4] + ), ) def test_rg_algorithm_quad_tree(shape: tuple[int, int], num_patch_splits: int): - num_variables = shape[0] * shape[1] + num_variables = np.prod(shape) rg = QuadTree(shape, num_patch_splits=num_patch_splits) root: RegionNode (root,) = list(rg.outputs) assert isinstance(root, RegionNode) assert root.scope == Scope(range(num_variables)) - assert all(len(rgn.scope) == 1 for rgn in rg.inputs) + assert all(len(rgn.scope) == shape[0] for rgn in rg.inputs) assert all(len(rg.region_inputs(rgn)) == 1 for rgn in rg.inner_region_nodes) if num_patch_splits == 2: assert all(len(rg.partition_inputs(ptn)) == 2 for ptn in rg.partition_nodes) @@ -108,22 +112,27 @@ def test_rg_algorithm_quad_tree(shape: tuple[int, int], num_patch_splits: int): check_region_graph_save_load(rg) -@pytest.mark.parametrize("shape", [(1, 1), (1, 3), (3, 1), (3, 3), (4, 4)]) +@pytest.mark.parametrize( + "shape", [(1, 1, 1), (1, 1, 3), (1, 3, 1), (1, 3, 3), (3, 3, 3), (1, 4, 4), (3, 4, 4)] +) def test_rg_algorithm_quad_graph(shape: tuple[int, int]): - num_variables = shape[0] * shape[1] + num_variables = np.prod(shape) rg = QuadGraph(shape) root: RegionNode (root,) = list(rg.outputs) assert isinstance(root, RegionNode) assert root.scope == Scope(range(num_variables)) - assert all(len(rgn.scope) == 1 for rgn in rg.inputs) + assert all(len(rgn.scope) == shape[0] for rgn in rg.inputs) assert all(len(rg.region_inputs(rgn)) in [1, 2] for rgn in rg.inner_region_nodes) assert all(len(rg.partition_inputs(ptn)) in [2, 4] for ptn in rg.partition_nodes) check_region_graph_save_load(rg) @pytest.mark.parametrize( - "shape,delta", itertools.product([(1, 1), (3, 3), (4, 4)], [1, [1, 2], [[1, 3], [2, 4]]]) + "shape,delta", + itertools.product( + [(1, 1, 1), (1, 3, 3), (1, 4, 4), (3, 3, 3), (3, 4, 4)], [1, [1, 2], [[1, 3], [2, 4]]] + ), ) def test_rg_algorithm_poon_domingos( shape: tuple[int, int],