llm

`gpu_energy(model_active_parameter_count, output_token_count, gpu_energy_alpha, gpu_energy_beta, gpu_energy_stdev)`

Compute energy consumption of a single GPU.

Parameters:

Name	Type	Description	Default
`model_active_parameter_count`	`float`	Number of active parameters of the model (in billion).	required
`output_token_count`	`float`	Number of generated tokens.	required
`gpu_energy_alpha`	`float`	Alpha parameter of the GPU linear power consumption profile.	required
`gpu_energy_beta`	`float`	Beta parameter of the GPU linear power consumption profile.	required
`gpu_energy_stdev`	`float`	Standard deviation of the GPU linear power consumption profile.	required

Returns:

Type	Description
`ValueOrRange`	The 95% confidence interval of energy consumption of a single GPU in kWh.

Source code in ecologits/impacts/llm.py

@dag.asset
def gpu_energy(
        model_active_parameter_count: float,
        output_token_count: float,
        gpu_energy_alpha: float,
        gpu_energy_beta: float,
        gpu_energy_stdev: float
) -> ValueOrRange:
    """
    Compute energy consumption of a single GPU.

    Args:
        model_active_parameter_count: Number of active parameters of the model (in billion).
        output_token_count: Number of generated tokens.
        gpu_energy_alpha: Alpha parameter of the GPU linear power consumption profile.
        gpu_energy_beta: Beta parameter of the GPU linear power consumption profile.
        gpu_energy_stdev: Standard deviation of the GPU linear power consumption profile.

    Returns:
        The 95% confidence interval of energy consumption of a single GPU in kWh.
    """
    gpu_energy_per_token_mean = gpu_energy_alpha * model_active_parameter_count + gpu_energy_beta
    gpu_energy_min = output_token_count * (gpu_energy_per_token_mean - 1.96 * gpu_energy_stdev)
    gpu_energy_max = output_token_count * (gpu_energy_per_token_mean + 1.96 * gpu_energy_stdev)
    return RangeValue(min=max(0, gpu_energy_min), max=gpu_energy_max)

`generation_latency(model_active_parameter_count, output_token_count, gpu_latency_alpha, gpu_latency_beta, gpu_latency_stdev, request_latency)`

Compute the token generation latency in seconds.

Parameters:

Name	Type	Description	Default
`model_active_parameter_count`	`float`	Number of active parameters of the model (in billion).	required
`output_token_count`	`float`	Number of generated tokens.	required
`gpu_latency_alpha`	`float`	Alpha parameter of the GPU linear latency profile.	required
`gpu_latency_beta`	`float`	Beta parameter of the GPU linear latency profile.	required
`gpu_latency_stdev`	`float`	Standard deviation of the GPU linear latency profile.	required
`request_latency`	`float`	Measured request latency (upper bound) in seconds.	required

Returns:

Type	Description
`ValueOrRange`	The token generation latency in seconds.

Source code in ecologits/impacts/llm.py

@dag.asset
def generation_latency(
        model_active_parameter_count: float,
        output_token_count: float,
        gpu_latency_alpha: float,
        gpu_latency_beta: float,
        gpu_latency_stdev: float,
        request_latency: float,
) -> ValueOrRange:
    """
    Compute the token generation latency in seconds.

    Args:
        model_active_parameter_count: Number of active parameters of the model (in billion).
        output_token_count: Number of generated tokens.
        gpu_latency_alpha: Alpha parameter of the GPU linear latency profile.
        gpu_latency_beta: Beta parameter of the GPU linear latency profile.
        gpu_latency_stdev: Standard deviation of the GPU linear latency profile.
        request_latency: Measured request latency (upper bound) in seconds.

    Returns:
        The token generation latency in seconds.
    """
    gpu_latency_per_token_mean = gpu_latency_alpha * model_active_parameter_count + gpu_latency_beta
    gpu_latency_min = output_token_count * (gpu_latency_per_token_mean - 1.96 * gpu_latency_stdev)
    gpu_latency_max = output_token_count * (gpu_latency_per_token_mean + 1.96 * gpu_latency_stdev)
    gpu_latency_interval = RangeValue(min=max(0, gpu_latency_min), max=gpu_latency_max)
    if gpu_latency_interval < request_latency:
        return gpu_latency_interval
    return request_latency

`model_required_memory(model_total_parameter_count, model_quantization_bits)`

Compute the required memory to load the model on GPU.

Parameters:

Name	Type	Description	Default
`model_total_parameter_count`	`float`	Number of parameters of the model (in billion).	required
`model_quantization_bits`	`int`	Number of bits used to represent the model weights.	required

Returns:

Type	Description
`float`	The amount of required GPU memory to load the model.

Source code in ecologits/impacts/llm.py

@dag.asset
def model_required_memory(
        model_total_parameter_count: float,
        model_quantization_bits: int,
) -> float:
    """
    Compute the required memory to load the model on GPU.

    Args:
        model_total_parameter_count: Number of parameters of the model (in billion).
        model_quantization_bits: Number of bits used to represent the model weights.

    Returns:
        The amount of required GPU memory to load the model.
    """
    return 1.2 * model_total_parameter_count * model_quantization_bits / 8

`gpu_required_count(model_required_memory, gpu_memory)`

Compute the number of required GPU to store the model.

Parameters:

Name	Type	Description	Default
`model_required_memory`	`float`	Required memory to load the model on GPU.	required
`gpu_memory`	`float`	Amount of memory available on a single GPU.	required

Returns:

Type	Description
`int`	The number of required GPUs to load the model.

Source code in ecologits/impacts/llm.py

@dag.asset
def gpu_required_count(
        model_required_memory: float,
        gpu_memory: float
) -> int:
    """
    Compute the number of required GPU to store the model.

    Args:
        model_required_memory: Required memory to load the model on GPU.
        gpu_memory: Amount of memory available on a single GPU.

    Returns:
        The number of required GPUs to load the model.
    """
    return ceil(model_required_memory / gpu_memory)

`server_energy(generation_latency, server_power, server_gpu_count, gpu_required_count)`

Compute the energy consumption of the server.

Parameters:

Name	Type	Description	Default
`generation_latency`	`float`	Token generation latency in seconds.	required
`server_power`	`float`	Power consumption of the server in kW.	required
`server_gpu_count`	`int`	Number of available GPUs in the server.	required
`gpu_required_count`	`int`	Number of required GPUs to load the model.	required

Returns:

Type	Description
`float`	The energy consumption of the server (GPUs are not included) in kWh.

Source code in ecologits/impacts/llm.py

@dag.asset
def server_energy(
        generation_latency: float,
        server_power: float,
        server_gpu_count: int,
        gpu_required_count: int
) -> float:
    """
    Compute the energy consumption of the server.

    Args:
        generation_latency: Token generation latency in seconds.
        server_power: Power consumption of the server in kW.
        server_gpu_count: Number of available GPUs in the server.
        gpu_required_count: Number of required GPUs to load the model.

    Returns:
        The energy consumption of the server (GPUs are not included) in kWh.
    """
    return (generation_latency / 3600) * server_power * (gpu_required_count / server_gpu_count)

`request_energy(datacenter_pue, server_energy, gpu_required_count, gpu_energy)`

Compute the energy consumption of the request.

Parameters:

Name	Type	Description	Default
`datacenter_pue`	`float`	PUE of the datacenter.	required
`server_energy`	`float`	Energy consumption of the server in kWh.	required
`gpu_required_count`	`int`	Number of required GPUs to load the model.	required
`gpu_energy`	`ValueOrRange`	Energy consumption of a single GPU in kWh.	required

Returns:

Type	Description
`ValueOrRange`	The energy consumption of the request in kWh.

Source code in ecologits/impacts/llm.py

@dag.asset
def request_energy(
        datacenter_pue: float,
        server_energy: float,
        gpu_required_count: int,
        gpu_energy: ValueOrRange
) -> ValueOrRange:
    """
    Compute the energy consumption of the request.

    Args:
        datacenter_pue: PUE of the datacenter.
        server_energy: Energy consumption of the server in kWh.
        gpu_required_count: Number of required GPUs to load the model.
        gpu_energy: Energy consumption of a single GPU in kWh.

    Returns:
        The energy consumption of the request in kWh.
    """
    return datacenter_pue * (server_energy + gpu_required_count * gpu_energy)

`request_usage_gwp(request_energy, if_electricity_mix_gwp)`

Compute the Global Warming Potential (GWP) usage impact of the request.

Parameters:

Name	Type	Description	Default
`request_energy`	`ValueOrRange`	Energy consumption of the request in kWh.	required
`if_electricity_mix_gwp`	`float`	GWP impact factor of electricity consumption in kgCO2eq / kWh.	required

Returns:

Type	Description
`ValueOrRange`	The GWP usage impact of the request in kgCO2eq.

Source code in ecologits/impacts/llm.py

@dag.asset
def request_usage_gwp(
        request_energy: ValueOrRange,
        if_electricity_mix_gwp: float
) -> ValueOrRange:
    """
    Compute the Global Warming Potential (GWP) usage impact of the request.

    Args:
        request_energy: Energy consumption of the request in kWh.
        if_electricity_mix_gwp: GWP impact factor of electricity consumption in kgCO2eq / kWh.

    Returns:
        The GWP usage impact of the request in kgCO2eq.
    """
    return request_energy * if_electricity_mix_gwp

`request_usage_adpe(request_energy, if_electricity_mix_adpe)`

Compute the Abiotic Depletion Potential for Elements (ADPe) usage impact of the request.

Parameters:

Name	Type	Description	Default
`request_energy`	`ValueOrRange`	Energy consumption of the request in kWh.	required
`if_electricity_mix_adpe`	`float`	ADPe impact factor of electricity consumption in kgSbeq / kWh.	required

Returns:

Type	Description
`ValueOrRange`	The ADPe usage impact of the request in kgSbeq.

Source code in ecologits/impacts/llm.py

@dag.asset
def request_usage_adpe(
        request_energy: ValueOrRange,
        if_electricity_mix_adpe: float
) -> ValueOrRange:
    """
    Compute the Abiotic Depletion Potential for Elements (ADPe) usage impact of the request.

    Args:
        request_energy: Energy consumption of the request in kWh.
        if_electricity_mix_adpe: ADPe impact factor of electricity consumption in kgSbeq / kWh.

    Returns:
        The ADPe usage impact of the request in kgSbeq.
    """
    return request_energy * if_electricity_mix_adpe

`request_usage_pe(request_energy, if_electricity_mix_pe)`

Compute the Primary Energy (PE) usage impact of the request.

Parameters:

Name	Type	Description	Default
`request_energy`	`ValueOrRange`	Energy consumption of the request in kWh.	required
`if_electricity_mix_pe`	`float`	PE impact factor of electricity consumption in MJ / kWh.	required

Returns:

Type	Description
`ValueOrRange`	The PE usage impact of the request in MJ.

Source code in ecologits/impacts/llm.py

@dag.asset
def request_usage_pe(
        request_energy: ValueOrRange,
        if_electricity_mix_pe: float
) -> ValueOrRange:
    """
    Compute the Primary Energy (PE) usage impact of the request.

    Args:
        request_energy: Energy consumption of the request in kWh.
        if_electricity_mix_pe: PE impact factor of electricity consumption in MJ / kWh.

    Returns:
        The PE usage impact of the request in MJ.
    """
    return request_energy * if_electricity_mix_pe

`server_gpu_embodied_gwp(server_embodied_gwp, server_gpu_count, gpu_embodied_gwp, gpu_required_count)`

Compute the Global Warming Potential (GWP) embodied impact of the server

Parameters:

Name	Type	Description	Default
`server_embodied_gwp`	`float`	GWP embodied impact of the server in kgCO2eq.	required
`server_gpu_count`	`float`	Number of available GPUs in the server.	required
`gpu_embodied_gwp`	`float`	GWP embodied impact of a single GPU in kgCO2eq.	required
`gpu_required_count`	`int`	Number of required GPUs to load the model.	required

Returns:

Type	Description
`float`	The GWP embodied impact of the server and the GPUs in kgCO2eq.

Source code in ecologits/impacts/llm.py

@dag.asset
def server_gpu_embodied_gwp(
        server_embodied_gwp: float,
        server_gpu_count: float,
        gpu_embodied_gwp: float,
        gpu_required_count: int
) -> float:
    """
    Compute the Global Warming Potential (GWP) embodied impact of the server

    Args:
        server_embodied_gwp: GWP embodied impact of the server in kgCO2eq.
        server_gpu_count: Number of available GPUs in the server.
        gpu_embodied_gwp: GWP embodied impact of a single GPU in kgCO2eq.
        gpu_required_count: Number of required GPUs to load the model.

    Returns:
        The GWP embodied impact of the server and the GPUs in kgCO2eq.
    """
    return (gpu_required_count / server_gpu_count) * server_embodied_gwp + gpu_required_count * gpu_embodied_gwp

`server_gpu_embodied_adpe(server_embodied_adpe, server_gpu_count, gpu_embodied_adpe, gpu_required_count)`

Compute the Abiotic Depletion Potential for Elements (ADPe) embodied impact of the server

Parameters:

Name	Type	Description	Default
`server_embodied_adpe`	`float`	ADPe embodied impact of the server in kgSbeq.	required
`server_gpu_count`	`float`	Number of available GPUs in the server.	required
`gpu_embodied_adpe`	`float`	ADPe embodied impact of a single GPU in kgSbeq.	required
`gpu_required_count`	`int`	Number of required GPUs to load the model.	required

Returns:

Type	Description
`float`	The ADPe embodied impact of the server and the GPUs in kgSbeq.

Source code in ecologits/impacts/llm.py

@dag.asset
def server_gpu_embodied_adpe(
        server_embodied_adpe: float,
        server_gpu_count: float,
        gpu_embodied_adpe: float,
        gpu_required_count: int
) -> float:
    """
    Compute the Abiotic Depletion Potential for Elements (ADPe) embodied impact of the server

    Args:
        server_embodied_adpe: ADPe embodied impact of the server in kgSbeq.
        server_gpu_count: Number of available GPUs in the server.
        gpu_embodied_adpe: ADPe embodied impact of a single GPU in kgSbeq.
        gpu_required_count: Number of required GPUs to load the model.

    Returns:
        The ADPe embodied impact of the server and the GPUs in kgSbeq.
    """
    return (gpu_required_count / server_gpu_count) * server_embodied_adpe + gpu_required_count * gpu_embodied_adpe

`server_gpu_embodied_pe(server_embodied_pe, server_gpu_count, gpu_embodied_pe, gpu_required_count)`

Compute the Primary Energy (PE) embodied impact of the server

Parameters:

Name	Type	Description	Default
`server_embodied_pe`	`float`	PE embodied impact of the server in MJ.	required
`server_gpu_count`	`float`	Number of available GPUs in the server.	required
`gpu_embodied_pe`	`float`	PE embodied impact of a single GPU in MJ.	required
`gpu_required_count`	`int`	Number of required GPUs to load the model.	required

Returns:

Type	Description
`float`	The PE embodied impact of the server and the GPUs in MJ.

Source code in ecologits/impacts/llm.py

@dag.asset
def server_gpu_embodied_pe(
        server_embodied_pe: float,
        server_gpu_count: float,
        gpu_embodied_pe: float,
        gpu_required_count: int
) -> float:
    """
    Compute the Primary Energy (PE) embodied impact of the server

    Args:
        server_embodied_pe: PE embodied impact of the server in MJ.
        server_gpu_count: Number of available GPUs in the server.
        gpu_embodied_pe: PE embodied impact of a single GPU in MJ.
        gpu_required_count: Number of required GPUs to load the model.

    Returns:
        The PE embodied impact of the server and the GPUs in MJ.
    """
    return (gpu_required_count / server_gpu_count) * server_embodied_pe + gpu_required_count * gpu_embodied_pe

`request_embodied_gwp(server_gpu_embodied_gwp, server_lifetime, generation_latency)`

Compute the Global Warming Potential (GWP) embodied impact of the request.

Parameters:

Name	Type	Description	Default
`server_gpu_embodied_gwp`	`float`	GWP embodied impact of the server and the GPUs in kgCO2eq.	required
`server_lifetime`	`float`	Lifetime duration of the server in seconds.	required
`generation_latency`	`ValueOrRange`	Token generation latency in seconds.	required

Returns:

Type	Description
`ValueOrRange`	The GWP embodied impact of the request in kgCO2eq.

Source code in ecologits/impacts/llm.py

@dag.asset
def request_embodied_gwp(
        server_gpu_embodied_gwp: float,
        server_lifetime: float,
        generation_latency: ValueOrRange
) -> ValueOrRange:
    """
    Compute the Global Warming Potential (GWP) embodied impact of the request.

    Args:
        server_gpu_embodied_gwp: GWP embodied impact of the server and the GPUs in kgCO2eq.
        server_lifetime: Lifetime duration of the server in seconds.
        generation_latency: Token generation latency in seconds.

    Returns:
        The GWP embodied impact of the request in kgCO2eq.
    """
    return (generation_latency / server_lifetime) * server_gpu_embodied_gwp

`request_embodied_adpe(server_gpu_embodied_adpe, server_lifetime, generation_latency)`

Compute the Abiotic Depletion Potential for Elements (ADPe) embodied impact of the request.

Parameters:

Name	Type	Description	Default
`server_gpu_embodied_adpe`	`float`	ADPe embodied impact of the server and the GPUs in kgSbeq.	required
`server_lifetime`	`float`	Lifetime duration of the server in seconds.	required
`generation_latency`	`ValueOrRange`	Token generation latency in seconds.	required

Returns:

Type	Description
`ValueOrRange`	The ADPe embodied impact of the request in kgSbeq.

Source code in ecologits/impacts/llm.py

@dag.asset
def request_embodied_adpe(
        server_gpu_embodied_adpe: float,
        server_lifetime: float,
        generation_latency: ValueOrRange
) -> ValueOrRange:
    """
    Compute the Abiotic Depletion Potential for Elements (ADPe) embodied impact of the request.

    Args:
        server_gpu_embodied_adpe: ADPe embodied impact of the server and the GPUs in kgSbeq.
        server_lifetime: Lifetime duration of the server in seconds.
        generation_latency: Token generation latency in seconds.

    Returns:
        The ADPe embodied impact of the request in kgSbeq.
    """
    return (generation_latency / server_lifetime) * server_gpu_embodied_adpe

`request_embodied_pe(server_gpu_embodied_pe, server_lifetime, generation_latency)`

Compute the Primary Energy (PE) embodied impact of the request.

Parameters:

Name	Type	Description	Default
`server_gpu_embodied_pe`	`float`	PE embodied impact of the server and the GPUs in MJ.	required
`server_lifetime`	`float`	Lifetime duration of the server in seconds.	required
`generation_latency`	`ValueOrRange`	Token generation latency in seconds.	required

Returns:

Type	Description
`ValueOrRange`	The PE embodied impact of the request in MJ.

Source code in ecologits/impacts/llm.py

@dag.asset
def request_embodied_pe(
        server_gpu_embodied_pe: float,
        server_lifetime: float,
        generation_latency: ValueOrRange
) -> ValueOrRange:
    """
    Compute the Primary Energy (PE) embodied impact of the request.

    Args:
        server_gpu_embodied_pe: PE embodied impact of the server and the GPUs in MJ.
        server_lifetime: Lifetime duration of the server in seconds.
        generation_latency: Token generation latency in seconds.

    Returns:
        The PE embodied impact of the request in MJ.
    """
    return (generation_latency / server_lifetime) * server_gpu_embodied_pe

compute_llm_impacts_dag(model_active_parameter_count, model_total_parameter_count, output_token_count, request_latency, if_electricity_mix_adpe, if_electricity_mix_pe, if_electricity_mix_gwp, model_quantization_bits=MODEL_QUANTIZATION_BITS, gpu_energy_alpha=GPU_ENERGY_ALPHA, gpu_energy_beta=GPU_ENERGY_BETA, gpu_energy_stdev=GPU_ENERGY_STDEV, gpu_latency_alpha=GPU_LATENCY_ALPHA, gpu_latency_beta=GPU_LATENCY_BETA, gpu_latency_stdev=GPU_LATENCY_STDEV, gpu_memory=GPU_MEMORY, gpu_embodied_gwp=GPU_EMBODIED_IMPACT_GWP, gpu_embodied_adpe=GPU_EMBODIED_IMPACT_ADPE, gpu_embodied_pe=GPU_EMBODIED_IMPACT_PE, server_gpu_count=SERVER_GPUS, server_power=SERVER_POWER, server_embodied_gwp=SERVER_EMBODIED_IMPACT_GWP, server_embodied_adpe=SERVER_EMBODIED_IMPACT_ADPE, server_embodied_pe=SERVER_EMBODIED_IMPACT_PE, server_lifetime=HARDWARE_LIFESPAN, datacenter_pue=DATACENTER_PUE)

Compute the impacts dag of an LLM generation request.

Parameters:

Name	Type	Description	Default
`model_active_parameter_count`	`ValueOrRange`	Number of active parameters of the model (in billion).	required
`model_total_parameter_count`	`ValueOrRange`	Number of parameters of the model (in billion).	required
`output_token_count`	`float`	Number of generated tokens.	required
`request_latency`	`float`	Measured request latency in seconds.	required
`if_electricity_mix_adpe`	`float`	ADPe impact factor of electricity consumption of kgSbeq / kWh (Antimony).	required
`if_electricity_mix_pe`	`float`	PE impact factor of electricity consumption in MJ / kWh.	required
`if_electricity_mix_gwp`	`float`	GWP impact factor of electricity consumption in kgCO2eq / kWh.	required
`model_quantization_bits`	`Optional[int]`	Number of bits used to represent the model weights.	`MODEL_QUANTIZATION_BITS`
`gpu_energy_alpha`	`Optional[float]`	Alpha parameter of the GPU linear power consumption profile.	`GPU_ENERGY_ALPHA`
`gpu_energy_beta`	`Optional[float]`	Beta parameter of the GPU linear power consumption profile.	`GPU_ENERGY_BETA`
`gpu_energy_stdev`	`Optional[float]`	Standard deviation of the GPU linear power consumption profile.	`GPU_ENERGY_STDEV`
`gpu_latency_alpha`	`Optional[float]`	Alpha parameter of the GPU linear latency profile.	`GPU_LATENCY_ALPHA`
`gpu_latency_beta`	`Optional[float]`	Beta parameter of the GPU linear latency profile.	`GPU_LATENCY_BETA`
`gpu_latency_stdev`	`Optional[float]`	Standard deviation of the GPU linear latency profile.	`GPU_LATENCY_STDEV`
`gpu_memory`	`Optional[float]`	Amount of memory available on a single GPU.	`GPU_MEMORY`
`gpu_embodied_gwp`	`Optional[float]`	GWP embodied impact of a single GPU.	`GPU_EMBODIED_IMPACT_GWP`
`gpu_embodied_adpe`	`Optional[float]`	ADPe embodied impact of a single GPU.	`GPU_EMBODIED_IMPACT_ADPE`
`gpu_embodied_pe`	`Optional[float]`	PE embodied impact of a single GPU.	`GPU_EMBODIED_IMPACT_PE`
`server_gpu_count`	`Optional[int]`	Number of available GPUs in the server.	`SERVER_GPUS`
`server_power`	`Optional[float]`	Power consumption of the server in kW.	`SERVER_POWER`
`server_embodied_gwp`	`Optional[float]`	GWP embodied impact of the server in kgCO2eq.	`SERVER_EMBODIED_IMPACT_GWP`
`server_embodied_adpe`	`Optional[float]`	ADPe embodied impact of the server in kgSbeq.	`SERVER_EMBODIED_IMPACT_ADPE`
`server_embodied_pe`	`Optional[float]`	PE embodied impact of the server in MJ.	`SERVER_EMBODIED_IMPACT_PE`
`server_lifetime`	`Optional[float]`	Lifetime duration of the server in seconds.	`HARDWARE_LIFESPAN`
`datacenter_pue`	`Optional[float]`	PUE of the datacenter.	`DATACENTER_PUE`

Returns:

Type	Description
`dict[str, ValueOrRange]`	The impacts dag with all intermediate states.

Source code in ecologits/impacts/llm.py

def compute_llm_impacts_dag(
        model_active_parameter_count: ValueOrRange,
        model_total_parameter_count: ValueOrRange,
        output_token_count: float,
        request_latency: float,
        if_electricity_mix_adpe: float,
        if_electricity_mix_pe: float,
        if_electricity_mix_gwp: float,
        model_quantization_bits: Optional[int] = MODEL_QUANTIZATION_BITS,
        gpu_energy_alpha: Optional[float] = GPU_ENERGY_ALPHA,
        gpu_energy_beta: Optional[float] = GPU_ENERGY_BETA,
        gpu_energy_stdev: Optional[float] = GPU_ENERGY_STDEV,
        gpu_latency_alpha: Optional[float] = GPU_LATENCY_ALPHA,
        gpu_latency_beta: Optional[float] = GPU_LATENCY_BETA,
        gpu_latency_stdev: Optional[float] = GPU_LATENCY_STDEV,
        gpu_memory: Optional[float] = GPU_MEMORY,
        gpu_embodied_gwp: Optional[float] = GPU_EMBODIED_IMPACT_GWP,
        gpu_embodied_adpe: Optional[float] = GPU_EMBODIED_IMPACT_ADPE,
        gpu_embodied_pe: Optional[float] = GPU_EMBODIED_IMPACT_PE,
        server_gpu_count: Optional[int] = SERVER_GPUS,
        server_power: Optional[float] = SERVER_POWER,
        server_embodied_gwp: Optional[float] = SERVER_EMBODIED_IMPACT_GWP,
        server_embodied_adpe: Optional[float] = SERVER_EMBODIED_IMPACT_ADPE,
        server_embodied_pe: Optional[float] = SERVER_EMBODIED_IMPACT_PE,
        server_lifetime: Optional[float] = HARDWARE_LIFESPAN,
        datacenter_pue: Optional[float] = DATACENTER_PUE,
) -> dict[str, ValueOrRange]:
    """
    Compute the impacts dag of an LLM generation request.

    Args:
        model_active_parameter_count: Number of active parameters of the model (in billion).
        model_total_parameter_count: Number of parameters of the model (in billion).
        output_token_count: Number of generated tokens.
        request_latency: Measured request latency in seconds.
        if_electricity_mix_adpe: ADPe impact factor of electricity consumption of kgSbeq / kWh (Antimony).
        if_electricity_mix_pe: PE impact factor of electricity consumption in MJ / kWh.
        if_electricity_mix_gwp: GWP impact factor of electricity consumption in kgCO2eq / kWh.
        model_quantization_bits: Number of bits used to represent the model weights.
        gpu_energy_alpha: Alpha parameter of the GPU linear power consumption profile.
        gpu_energy_beta: Beta parameter of the GPU linear power consumption profile.
        gpu_energy_stdev: Standard deviation of the GPU linear power consumption profile.
        gpu_latency_alpha: Alpha parameter of the GPU linear latency profile.
        gpu_latency_beta: Beta parameter of the GPU linear latency profile.
        gpu_latency_stdev: Standard deviation of the GPU linear latency profile.
        gpu_memory: Amount of memory available on a single GPU.
        gpu_embodied_gwp: GWP embodied impact of a single GPU.
        gpu_embodied_adpe: ADPe embodied impact of a single GPU.
        gpu_embodied_pe: PE embodied impact of a single GPU.
        server_gpu_count: Number of available GPUs in the server.
        server_power: Power consumption of the server in kW.
        server_embodied_gwp: GWP embodied impact of the server in kgCO2eq.
        server_embodied_adpe: ADPe embodied impact of the server in kgSbeq.
        server_embodied_pe: PE embodied impact of the server in MJ.
        server_lifetime: Lifetime duration of the server in seconds.
        datacenter_pue: PUE of the datacenter.

    Returns:
        The impacts dag with all intermediate states.
    """
    results = dag.execute(
        model_active_parameter_count=model_active_parameter_count,
        model_total_parameter_count=model_total_parameter_count,
        model_quantization_bits=model_quantization_bits,
        output_token_count=output_token_count,
        request_latency=request_latency,
        if_electricity_mix_gwp=if_electricity_mix_gwp,
        if_electricity_mix_adpe=if_electricity_mix_adpe,
        if_electricity_mix_pe=if_electricity_mix_pe,
        gpu_energy_alpha=gpu_energy_alpha,
        gpu_energy_beta=gpu_energy_beta,
        gpu_energy_stdev=gpu_energy_stdev,
        gpu_latency_alpha=gpu_latency_alpha,
        gpu_latency_beta=gpu_latency_beta,
        gpu_latency_stdev=gpu_latency_stdev,
        gpu_memory=gpu_memory,
        gpu_embodied_gwp=gpu_embodied_gwp,
        gpu_embodied_adpe=gpu_embodied_adpe,
        gpu_embodied_pe=gpu_embodied_pe,
        server_gpu_count=server_gpu_count,
        server_power=server_power,
        server_embodied_gwp=server_embodied_gwp,
        server_embodied_adpe=server_embodied_adpe,
        server_embodied_pe=server_embodied_pe,
        server_lifetime=server_lifetime,
        datacenter_pue=datacenter_pue,
    )
    return results

`compute_llm_impacts(model_active_parameter_count, model_total_parameter_count, output_token_count, if_electricity_mix_adpe, if_electricity_mix_pe, if_electricity_mix_gwp, request_latency=None, **kwargs)`

Compute the impacts of an LLM generation request.

Parameters:

Name	Type	Description	Default
`model_active_parameter_count`	`ValueOrRange`	Number of active parameters of the model (in billion).	required
`model_total_parameter_count`	`ValueOrRange`	Number of total parameters of the model (in billion).	required
`output_token_count`	`float`	Number of generated tokens.	required
`if_electricity_mix_adpe`	`float`	ADPe impact factor of electricity consumption of kgSbeq / kWh (Antimony).	required
`if_electricity_mix_pe`	`float`	PE impact factor of electricity consumption in MJ / kWh.	required
`if_electricity_mix_gwp`	`float`	GWP impact factor of electricity consumption in kgCO2eq / kWh.	required
`request_latency`	`Optional[float]`	Measured request latency in seconds.	`None`
`**kwargs`	`Any`	Any other optional parameter.	`{}`

Returns:

Type	Description
`Impacts`	The impacts of an LLM generation request.

Source code in ecologits/impacts/llm.py

def compute_llm_impacts(
        model_active_parameter_count: ValueOrRange,
        model_total_parameter_count: ValueOrRange,
        output_token_count: float,
        if_electricity_mix_adpe: float,
        if_electricity_mix_pe: float,
        if_electricity_mix_gwp: float,
        request_latency: Optional[float] = None,
        **kwargs: Any
) -> Impacts:
    """
    Compute the impacts of an LLM generation request.

    Args:
        model_active_parameter_count: Number of active parameters of the model (in billion).
        model_total_parameter_count: Number of total parameters of the model (in billion).
        output_token_count: Number of generated tokens.
        if_electricity_mix_adpe: ADPe impact factor of electricity consumption of kgSbeq / kWh (Antimony).
        if_electricity_mix_pe: PE impact factor of electricity consumption in MJ / kWh.
        if_electricity_mix_gwp: GWP impact factor of electricity consumption in kgCO2eq / kWh.
        request_latency: Measured request latency in seconds.
        **kwargs: Any other optional parameter.

    Returns:
        The impacts of an LLM generation request.
    """
    if request_latency is None:
        request_latency = math.inf

    active_params = [model_active_parameter_count]
    total_params = [model_total_parameter_count]

    if isinstance(model_active_parameter_count, RangeValue) or isinstance(model_total_parameter_count, RangeValue):
        if isinstance(model_active_parameter_count, RangeValue):
            active_params = [model_active_parameter_count.min, model_active_parameter_count.max]
        else:
            active_params = [model_active_parameter_count, model_active_parameter_count]
        if isinstance(model_total_parameter_count, RangeValue):
            total_params = [model_total_parameter_count.min, model_total_parameter_count.max]
        else:
            total_params = [model_total_parameter_count, model_total_parameter_count]

    results: dict[str, Union[RangeValue, float, int]] = {}
    fields = ["request_energy", "request_usage_gwp", "request_usage_adpe", "request_usage_pe",
              "request_embodied_gwp", "request_embodied_adpe", "request_embodied_pe"]

    for act_param, tot_param in zip(active_params, total_params):
        res = compute_llm_impacts_dag(
            model_active_parameter_count=act_param,
            model_total_parameter_count=tot_param,
            output_token_count=output_token_count,
            request_latency=request_latency,
            if_electricity_mix_adpe=if_electricity_mix_adpe,
            if_electricity_mix_pe=if_electricity_mix_pe,
            if_electricity_mix_gwp=if_electricity_mix_gwp,
            **kwargs
        )
        for field in fields:
            if field in results:
                min_result = results[field]
                max_result = res[field]
                if isinstance(min_result, RangeValue):
                    min_result = cast(Union[float, int], min_result.min)
                if isinstance(max_result, RangeValue):
                    max_result = cast(Union[float, int], max_result.max)
                results[field] = RangeValue(min=min_result, max=max_result)
            else:
                results[field] = res[field]

    energy = Energy(value=results["request_energy"])
    gwp_usage = GWP(value=results["request_usage_gwp"])
    adpe_usage = ADPe(value=results["request_usage_adpe"])
    pe_usage = PE(value=results["request_usage_pe"])
    gwp_embodied = GWP(value=results["request_embodied_gwp"])
    adpe_embodied = ADPe(value=results["request_embodied_adpe"])
    pe_embodied = PE(value=results["request_embodied_pe"])
    return Impacts(
        energy=energy,
        gwp=gwp_usage + gwp_embodied,
        adpe=adpe_usage + adpe_embodied,
        pe=pe_usage + pe_embodied,
        usage=Usage(
            energy=energy,
            gwp=gwp_usage,
            adpe=adpe_usage,
            pe=pe_usage
        ),
        embodied=Embodied(
            gwp=gwp_embodied,
            adpe=adpe_embodied,
            pe=pe_embodied
        )
    )