llm

`gpu_energy(model_active_parameter_count, output_token_count, batch_size, gpu_energy_alpha, gpu_energy_beta, gpu_energy_gamma)`

Compute energy consumption of a single GPU.

Parameters:

Name	Type	Description	Default
`model_active_parameter_count`	`float`	Number of active parameters of the model (in billion).	required
`output_token_count`	`float`	Number of generated tokens.	required
`batch_size`	`int`	Number of requests handled concurrently by the server.	required
`gpu_energy_alpha`	`float`	Alpha coefficient of the energy regression.	required
`gpu_energy_beta`	`float`	Beta coefficient of the energy regression.	required
`gpu_energy_gamma`	`float`	Beta coefficient of the energy regression.	required

Returns:

Type	Description
`ValueOrRange`	The energy consumption of a single GPU in kWh.

Source code in ecologits/impacts/llm.py

@dag.asset
def gpu_energy(
        model_active_parameter_count: float,
        output_token_count: float,
        batch_size: int,
        gpu_energy_alpha: float,
        gpu_energy_beta: float,
        gpu_energy_gamma: float,
) -> ValueOrRange:
    """
    Compute energy consumption of a single GPU.

    Args:
        model_active_parameter_count: Number of active parameters of the model (in billion).
        output_token_count: Number of generated tokens.
        batch_size: Number of requests handled concurrently by the server.
        gpu_energy_alpha: Alpha coefficient of the energy regression.
        gpu_energy_beta: Beta coefficient of the energy regression.
        gpu_energy_gamma: Beta coefficient of the energy regression.

    Returns:
        The energy consumption of a single GPU in kWh.
    """
    gpu_energy_per_token = gpu_energy_alpha * math.exp(gpu_energy_beta * batch_size) * model_active_parameter_count + \
        gpu_energy_gamma
    gpu_energy_per_token /= 1000    # convert to kWh
    return output_token_count * gpu_energy_per_token

`generation_latency(model_active_parameter_count, output_token_count, batch_size, latency_alpha, latency_beta, latency_gamma, request_latency)`

Compute the token generation latency in seconds.

Parameters:

Name	Type	Description	Default
`model_active_parameter_count`	`float`	Number of active parameters of the model (in billion).	required
`output_token_count`	`float`	Number of generated tokens.	required
`batch_size`	`int`	Number of requests handled concurrently by the server.	required
`latency_alpha`	`float`	Alpha coefficient of the latency regression.	required
`latency_beta`	`float`	Beta coefficient of the latency regression.	required
`latency_gamma`	`float`	Gamma coefficient of the latency regression.	required

Returns:

Type	Description
`ValueOrRange`	The token generation latency in seconds.

Source code in ecologits/impacts/llm.py

@dag.asset
def generation_latency(
        model_active_parameter_count: float,
        output_token_count: float,
        batch_size: int,
        latency_alpha: float,
        latency_beta: float,
        latency_gamma: float,
        request_latency: float
) -> ValueOrRange:
    """
    Compute the token generation latency in seconds.

    Args:
        model_active_parameter_count: Number of active parameters of the model (in billion).
        output_token_count: Number of generated tokens.
        batch_size: Number of requests handled concurrently by the server.
        latency_alpha: Alpha coefficient of the latency regression.
        latency_beta: Beta coefficient of the latency regression.
        latency_gamma: Gamma coefficient of the latency regression.

    Returns:
        The token generation latency in seconds.
    """
    latency_per_token = latency_alpha * model_active_parameter_count + latency_beta * batch_size + latency_gamma
    gpu_latency = output_token_count * latency_per_token
    if request_latency < gpu_latency:
        return request_latency
    return gpu_latency

`model_required_memory(model_total_parameter_count, model_quantization_bits)`

Compute the required memory to load the model on GPU.

Parameters:

Name	Type	Description	Default
`model_total_parameter_count`	`float`	Number of parameters of the model (in billion).	required
`model_quantization_bits`	`int`	Number of bits used to represent the model weights.	required

Returns:

Type	Description
`float`	The amount of required GPU memory to load the model.

Source code in ecologits/impacts/llm.py

@dag.asset
def model_required_memory(
        model_total_parameter_count: float,
        model_quantization_bits: int,
) -> float:
    """
    Compute the required memory to load the model on GPU.

    Args:
        model_total_parameter_count: Number of parameters of the model (in billion).
        model_quantization_bits: Number of bits used to represent the model weights.

    Returns:
        The amount of required GPU memory to load the model.
    """
    return 1.2 * model_total_parameter_count * model_quantization_bits / 8

`gpu_required_count(model_required_memory, gpu_memory)`

Compute the number of required GPU to store the model.

Parameters:

Name	Type	Description	Default
`model_required_memory`	`float`	Required memory to load the model on GPU.	required
`gpu_memory`	`float`	Amount of memory available on a single GPU.	required

Returns:

Type	Description
`int`	The number of required GPUs to load the model.

Source code in ecologits/impacts/llm.py

@dag.asset
def gpu_required_count(
        model_required_memory: float,
        gpu_memory: float
) -> int:
    """
    Compute the number of required GPU to store the model.

    Args:
        model_required_memory: Required memory to load the model on GPU.
        gpu_memory: Amount of memory available on a single GPU.

    Returns:
        The number of required GPUs to load the model.
    """
    gpu_nb = math.ceil(model_required_memory / gpu_memory)
    return 2 ** math.ceil(math.log2(gpu_nb))    # Round-up in base two

`server_energy(generation_latency, server_power, server_gpu_count, gpu_required_count, batch_size)`

Compute the energy consumption of the server.

Parameters:

Name	Type	Description	Default
`generation_latency`	`float`	Token generation latency in seconds.	required
`server_power`	`float`	Power consumption of the server in kW.	required
`server_gpu_count`	`int`	Number of available GPUs in the server.	required
`gpu_required_count`	`int`	Number of required GPUs to load the model.	required
`batch_size`	`int`	Number of requests handled concurrently by the server.	required

Returns:

Type	Description
`float`	The energy consumption of the server (GPUs are not included) in kWh.

Source code in ecologits/impacts/llm.py

@dag.asset
def server_energy(
        generation_latency: float,
        server_power: float,
        server_gpu_count: int,
        gpu_required_count: int,
        batch_size: int
) -> float:
    """
    Compute the energy consumption of the server.

    Args:
        generation_latency: Token generation latency in seconds.
        server_power: Power consumption of the server in kW.
        server_gpu_count: Number of available GPUs in the server.
        gpu_required_count: Number of required GPUs to load the model.
        batch_size: Number of requests handled concurrently by the server.

    Returns:
        The energy consumption of the server (GPUs are not included) in kWh.
    """
    return (generation_latency / 3600) * server_power * (gpu_required_count / server_gpu_count) * (1 / batch_size)

`request_energy(datacenter_pue, server_energy, gpu_required_count, gpu_energy)`

Compute the energy consumption of the request.

Parameters:

Name	Type	Description	Default
`datacenter_pue`	`float`	Power Usage Effectiveness of the data center.	required
`server_energy`	`float`	Energy consumption of the server in kWh.	required
`gpu_required_count`	`int`	Number of required GPUs to load the model.	required
`gpu_energy`	`ValueOrRange`	Energy consumption of a single GPU in kWh.	required

Returns:

Type	Description
`ValueOrRange`	The energy consumption of the request in kWh.

Source code in ecologits/impacts/llm.py

@dag.asset
def request_energy(
        datacenter_pue: float,
        server_energy: float,
        gpu_required_count: int,
        gpu_energy: ValueOrRange
) -> ValueOrRange:
    """
    Compute the energy consumption of the request.

    Args:
        datacenter_pue: Power Usage Effectiveness of the data center.
        server_energy: Energy consumption of the server in kWh.
        gpu_required_count: Number of required GPUs to load the model.
        gpu_energy: Energy consumption of a single GPU in kWh.

    Returns:
        The energy consumption of the request in kWh.
    """
    return datacenter_pue * (server_energy + gpu_required_count * gpu_energy)

`request_usage_gwp(request_energy, if_electricity_mix_gwp)`

Compute the Global Warming Potential (GWP) usage impact of the request.

Parameters:

Name	Type	Description	Default
`request_energy`	`ValueOrRange`	Energy consumption of the request in kWh.	required
`if_electricity_mix_gwp`	`float`	GWP impact factor of electricity consumption in kgCO2eq / kWh.	required

Returns:

Type	Description
`ValueOrRange`	The GWP usage impact of the request in kgCO2eq.

Source code in ecologits/impacts/llm.py

@dag.asset
def request_usage_gwp(
        request_energy: ValueOrRange,
        if_electricity_mix_gwp: float
) -> ValueOrRange:
    """
    Compute the Global Warming Potential (GWP) usage impact of the request.

    Args:
        request_energy: Energy consumption of the request in kWh.
        if_electricity_mix_gwp: GWP impact factor of electricity consumption in kgCO2eq / kWh.

    Returns:
        The GWP usage impact of the request in kgCO2eq.
    """
    return request_energy * if_electricity_mix_gwp

`request_usage_adpe(request_energy, if_electricity_mix_adpe)`

Compute the Abiotic Depletion Potential for Elements (ADPe) usage impact of the request.

Parameters:

Name	Type	Description	Default
`request_energy`	`ValueOrRange`	Energy consumption of the request in kWh.	required
`if_electricity_mix_adpe`	`float`	ADPe impact factor of electricity consumption in kgSbeq / kWh.	required

Returns:

Type	Description
`ValueOrRange`	The ADPe usage impact of the request in kgSbeq.

Source code in ecologits/impacts/llm.py

@dag.asset
def request_usage_adpe(
        request_energy: ValueOrRange,
        if_electricity_mix_adpe: float
) -> ValueOrRange:
    """
    Compute the Abiotic Depletion Potential for Elements (ADPe) usage impact of the request.

    Args:
        request_energy: Energy consumption of the request in kWh.
        if_electricity_mix_adpe: ADPe impact factor of electricity consumption in kgSbeq / kWh.

    Returns:
        The ADPe usage impact of the request in kgSbeq.
    """
    return request_energy * if_electricity_mix_adpe

`request_usage_pe(request_energy, if_electricity_mix_pe)`

Compute the Primary Energy (PE) usage impact of the request.

Parameters:

Name	Type	Description	Default
`request_energy`	`ValueOrRange`	Energy consumption of the request in kWh.	required
`if_electricity_mix_pe`	`float`	PE impact factor of electricity consumption in MJ / kWh.	required

Returns:

Type	Description
`ValueOrRange`	The PE usage impact of the request in MJ.

Source code in ecologits/impacts/llm.py

@dag.asset
def request_usage_pe(
        request_energy: ValueOrRange,
        if_electricity_mix_pe: float
) -> ValueOrRange:
    """
    Compute the Primary Energy (PE) usage impact of the request.

    Args:
        request_energy: Energy consumption of the request in kWh.
        if_electricity_mix_pe: PE impact factor of electricity consumption in MJ / kWh.

    Returns:
        The PE usage impact of the request in MJ.
    """
    return request_energy * if_electricity_mix_pe

`request_usage_wcf(request_energy, if_electricity_mix_wue, datacenter_wue, datacenter_pue)`

Compute the water usage impact of the request.

Parameters:

Name	Type	Description	Default
`request_energy`	`ValueOrRange`	Energy consumption of the request in kWh.	required
`if_electricity_mix_wue`	`float`	WCF impact factor of electricity consumption in L / kWh.	required
`datacenter_wue`	`float`	Water Usage Effectiveness of the data center in L/kWh.	required
`datacenter_pue`	`float`	Power Usage Effectiveness of the data center.	required

Returns: The water usage impact of the request in liters.

Source code in ecologits/impacts/llm.py

@dag.asset
def request_usage_wcf(
        request_energy: ValueOrRange,
        if_electricity_mix_wue: float,
        datacenter_wue: float,
        datacenter_pue: float
) -> ValueOrRange:
    """
    Compute the water usage impact of the request.

    Args:
        request_energy: Energy consumption of the request in kWh.
        if_electricity_mix_wue: WCF impact factor of electricity consumption in L / kWh.
        datacenter_wue: Water Usage Effectiveness of the data center in L/kWh.
        datacenter_pue: Power Usage Effectiveness of the data center.
    Returns:
        The water usage impact of the request in liters.
    """
    return request_energy * (datacenter_wue + datacenter_pue * if_electricity_mix_wue)

`server_gpu_embodied_gwp(server_embodied_gwp, server_gpu_count, gpu_embodied_gwp, gpu_required_count)`

Compute the Global Warming Potential (GWP) embodied impact of the server

Parameters:

Name	Type	Description	Default
`server_embodied_gwp`	`float`	GWP embodied impact of the server in kgCO2eq.	required
`server_gpu_count`	`float`	Number of available GPUs in the server.	required
`gpu_embodied_gwp`	`float`	GWP embodied impact of a single GPU in kgCO2eq.	required
`gpu_required_count`	`int`	Number of required GPUs to load the model.	required

Returns:

Type	Description
`float`	The GWP embodied impact of the server and the GPUs in kgCO2eq.

Source code in ecologits/impacts/llm.py

@dag.asset
def server_gpu_embodied_gwp(
        server_embodied_gwp: float,
        server_gpu_count: float,
        gpu_embodied_gwp: float,
        gpu_required_count: int
) -> float:
    """
    Compute the Global Warming Potential (GWP) embodied impact of the server

    Args:
        server_embodied_gwp: GWP embodied impact of the server in kgCO2eq.
        server_gpu_count: Number of available GPUs in the server.
        gpu_embodied_gwp: GWP embodied impact of a single GPU in kgCO2eq.
        gpu_required_count: Number of required GPUs to load the model.

    Returns:
        The GWP embodied impact of the server and the GPUs in kgCO2eq.
    """
    return (gpu_required_count / server_gpu_count) * server_embodied_gwp + gpu_required_count * gpu_embodied_gwp

`server_gpu_embodied_adpe(server_embodied_adpe, server_gpu_count, gpu_embodied_adpe, gpu_required_count)`

Compute the Abiotic Depletion Potential for Elements (ADPe) embodied impact of the server

Parameters:

Name	Type	Description	Default
`server_embodied_adpe`	`float`	ADPe embodied impact of the server in kgSbeq.	required
`server_gpu_count`	`float`	Number of available GPUs in the server.	required
`gpu_embodied_adpe`	`float`	ADPe embodied impact of a single GPU in kgSbeq.	required
`gpu_required_count`	`int`	Number of required GPUs to load the model.	required

Returns:

Type	Description
`float`	The ADPe embodied impact of the server and the GPUs in kgSbeq.

Source code in ecologits/impacts/llm.py

@dag.asset
def server_gpu_embodied_adpe(
        server_embodied_adpe: float,
        server_gpu_count: float,
        gpu_embodied_adpe: float,
        gpu_required_count: int
) -> float:
    """
    Compute the Abiotic Depletion Potential for Elements (ADPe) embodied impact of the server

    Args:
        server_embodied_adpe: ADPe embodied impact of the server in kgSbeq.
        server_gpu_count: Number of available GPUs in the server.
        gpu_embodied_adpe: ADPe embodied impact of a single GPU in kgSbeq.
        gpu_required_count: Number of required GPUs to load the model.

    Returns:
        The ADPe embodied impact of the server and the GPUs in kgSbeq.
    """
    return (gpu_required_count / server_gpu_count) * server_embodied_adpe + gpu_required_count * gpu_embodied_adpe

`server_gpu_embodied_pe(server_embodied_pe, server_gpu_count, gpu_embodied_pe, gpu_required_count)`

Compute the Primary Energy (PE) embodied impact of the server

Parameters:

Name	Type	Description	Default
`server_embodied_pe`	`float`	PE embodied impact of the server in MJ.	required
`server_gpu_count`	`float`	Number of available GPUs in the server.	required
`gpu_embodied_pe`	`float`	PE embodied impact of a single GPU in MJ.	required
`gpu_required_count`	`int`	Number of required GPUs to load the model.	required

Returns:

Type	Description
`float`	The PE embodied impact of the server and the GPUs in MJ.

Source code in ecologits/impacts/llm.py

@dag.asset
def server_gpu_embodied_pe(
        server_embodied_pe: float,
        server_gpu_count: float,
        gpu_embodied_pe: float,
        gpu_required_count: int
) -> float:
    """
    Compute the Primary Energy (PE) embodied impact of the server

    Args:
        server_embodied_pe: PE embodied impact of the server in MJ.
        server_gpu_count: Number of available GPUs in the server.
        gpu_embodied_pe: PE embodied impact of a single GPU in MJ.
        gpu_required_count: Number of required GPUs to load the model.

    Returns:
        The PE embodied impact of the server and the GPUs in MJ.
    """
    return (gpu_required_count / server_gpu_count) * server_embodied_pe + gpu_required_count * gpu_embodied_pe

`request_embodied_gwp(server_gpu_embodied_gwp, server_lifetime, generation_latency, batch_size)`

Compute the Global Warming Potential (GWP) embodied impact of the request.

Parameters:

Name	Type	Description	Default
`server_gpu_embodied_gwp`	`float`	GWP embodied impact of the server and the GPUs in kgCO2eq.	required
`server_lifetime`	`float`	Lifetime duration of the server in seconds.	required
`generation_latency`	`ValueOrRange`	Token generation latency in seconds.	required
`batch_size`	`int`	Number of requests handled concurrently by the server.	required

Returns:

Type	Description
`ValueOrRange`	The GWP embodied impact of the request in kgCO2eq.

Source code in ecologits/impacts/llm.py

@dag.asset
def request_embodied_gwp(
        server_gpu_embodied_gwp: float,
        server_lifetime: float,
        generation_latency: ValueOrRange,
        batch_size: int
) -> ValueOrRange:
    """
    Compute the Global Warming Potential (GWP) embodied impact of the request.

    Args:
        server_gpu_embodied_gwp: GWP embodied impact of the server and the GPUs in kgCO2eq.
        server_lifetime: Lifetime duration of the server in seconds.
        generation_latency: Token generation latency in seconds.
        batch_size: Number of requests handled concurrently by the server.

    Returns:
        The GWP embodied impact of the request in kgCO2eq.
    """
    return generation_latency * server_gpu_embodied_gwp / (server_lifetime * batch_size)

`request_embodied_adpe(server_gpu_embodied_adpe, server_lifetime, generation_latency, batch_size)`

Compute the Abiotic Depletion Potential for Elements (ADPe) embodied impact of the request.

Parameters:

Name	Type	Description	Default
`server_gpu_embodied_adpe`	`float`	ADPe embodied impact of the server and the GPUs in kgSbeq.	required
`server_lifetime`	`float`	Lifetime duration of the server in seconds.	required
`generation_latency`	`ValueOrRange`	Token generation latency in seconds.	required
`batch_size`	`int`	Number of requests handled concurrently by the server.	required

Returns:

Type	Description
`ValueOrRange`	The ADPe embodied impact of the request in kgSbeq.

Source code in ecologits/impacts/llm.py

@dag.asset
def request_embodied_adpe(
        server_gpu_embodied_adpe: float,
        server_lifetime: float,
        generation_latency: ValueOrRange,
        batch_size: int
) -> ValueOrRange:
    """
    Compute the Abiotic Depletion Potential for Elements (ADPe) embodied impact of the request.

    Args:
        server_gpu_embodied_adpe: ADPe embodied impact of the server and the GPUs in kgSbeq.
        server_lifetime: Lifetime duration of the server in seconds.
        generation_latency: Token generation latency in seconds.
        batch_size: Number of requests handled concurrently by the server.

    Returns:
        The ADPe embodied impact of the request in kgSbeq.
    """
    return generation_latency * server_gpu_embodied_adpe / (server_lifetime * batch_size)

`request_embodied_pe(server_gpu_embodied_pe, server_lifetime, generation_latency, batch_size)`

Compute the Primary Energy (PE) embodied impact of the request.

Parameters:

Name	Type	Description	Default
`server_gpu_embodied_pe`	`float`	PE embodied impact of the server and the GPUs in MJ.	required
`server_lifetime`	`float`	Lifetime duration of the server in seconds.	required
`generation_latency`	`ValueOrRange`	Token generation latency in seconds.	required
`batch_size`	`int`	Number of requests handled concurrently by the server.	required

Returns:

Type	Description
`ValueOrRange`	The PE embodied impact of the request in MJ.

Source code in ecologits/impacts/llm.py

@dag.asset
def request_embodied_pe(
        server_gpu_embodied_pe: float,
        server_lifetime: float,
        generation_latency: ValueOrRange,
        batch_size: int
) -> ValueOrRange:
    """
    Compute the Primary Energy (PE) embodied impact of the request.

    Args:
        server_gpu_embodied_pe: PE embodied impact of the server and the GPUs in MJ.
        server_lifetime: Lifetime duration of the server in seconds.
        generation_latency: Token generation latency in seconds.
        batch_size: Number of requests handled concurrently by the server.

    Returns:
        The PE embodied impact of the request in MJ.
    """
    return generation_latency * server_gpu_embodied_pe / (server_lifetime * batch_size)

compute_llm_impacts_dag(model_active_parameter_count, model_total_parameter_count, output_token_count, request_latency, if_electricity_mix_adpe, if_electricity_mix_pe, if_electricity_mix_gwp, if_electricity_mix_wue, datacenter_pue, datacenter_wue, model_quantization_bits=MODEL_QUANTIZATION_BITS, gpu_energy_alpha=GPU_ENERGY_ALPHA, gpu_energy_beta=GPU_ENERGY_BETA, gpu_energy_gamma=GPU_ENERGY_GAMMA, latency_alpha=LATENCY_ALPHA, latency_beta=LATENCY_BETA, latency_gamma=LATENCY_GAMMA, gpu_memory=GPU_MEMORY, gpu_embodied_gwp=GPU_EMBODIED_IMPACT_GWP, gpu_embodied_adpe=GPU_EMBODIED_IMPACT_ADPE, gpu_embodied_pe=GPU_EMBODIED_IMPACT_PE, server_gpu_count=SERVER_GPUS, server_power=SERVER_POWER, server_embodied_gwp=SERVER_EMBODIED_IMPACT_GWP, server_embodied_adpe=SERVER_EMBODIED_IMPACT_ADPE, server_embodied_pe=SERVER_EMBODIED_IMPACT_PE, server_lifetime=HARDWARE_LIFESPAN, batch_size=BATCH_SIZE)

Compute the impacts dag of an LLM generation request.

Parameters:

Name	Type	Description	Default
`model_active_parameter_count`	`ValueOrRange`	Number of active parameters of the model (in billion).	required
`model_total_parameter_count`	`ValueOrRange`	Number of parameters of the model (in billion).	required
`output_token_count`	`float`	Number of generated tokens.	required
`request_latency`	`float`	Measured request latency in seconds.	required
`if_electricity_mix_adpe`	`float`	ADPe impact factor of electricity consumption in kgSbeq / kWh (Antimony).	required
`if_electricity_mix_pe`	`float`	PE impact factor of electricity consumption in MJ / kWh.	required
`if_electricity_mix_gwp`	`float`	GWP impact factor of electricity consumption in kgCO2eq / kWh.	required
`if_electricity_mix_wue`	`float`	WCF impact factor of electricity consumption in L / kWh.	required
`datacenter_wue`	`ValueOrRange`	Water Usage Effectiveness of the data center in L/kWh.	required
`datacenter_pue`	`ValueOrRange`	Power Usage Effectiveness of the data center.	required
`model_quantization_bits`	`Optional[int]`	Number of bits used to represent the model weights.	`MODEL_QUANTIZATION_BITS`
`gpu_energy_alpha`	`Optional[float]`	Alpha coefficient of the "GPU energy" regression.	`GPU_ENERGY_ALPHA`
`gpu_energy_beta`	`Optional[float]`	Beta coefficient of the "GPU energy" regression.	`GPU_ENERGY_BETA`
`gpu_energy_gamma`	`Optional[float]`	Gamma coefficient of the "GPU energy" regression.	`GPU_ENERGY_GAMMA`
`latency_alpha`	`Optional[float]`	Alpha coefficient of the "Latency" regression.	`LATENCY_ALPHA`
`latency_beta`	`Optional[float]`	Beta coefficient of the "Latency" regression.	`LATENCY_BETA`
`latency_gamma`	`Optional[float]`	Gamma coefficient of the "Latency" regression.	`LATENCY_GAMMA`
`gpu_memory`	`Optional[float]`	Amount of memory available on a single GPU.	`GPU_MEMORY`
`gpu_embodied_gwp`	`Optional[float]`	GWP embodied impact of a single GPU.	`GPU_EMBODIED_IMPACT_GWP`
`gpu_embodied_adpe`	`Optional[float]`	ADPe embodied impact of a single GPU.	`GPU_EMBODIED_IMPACT_ADPE`
`gpu_embodied_pe`	`Optional[float]`	PE embodied impact of a single GPU.	`GPU_EMBODIED_IMPACT_PE`
`server_gpu_count`	`Optional[int]`	Number of available GPUs in the server.	`SERVER_GPUS`
`server_power`	`Optional[float]`	Power consumption of the server in kW.	`SERVER_POWER`
`server_embodied_gwp`	`Optional[float]`	GWP embodied impact of the server in kgCO2eq.	`SERVER_EMBODIED_IMPACT_GWP`
`server_embodied_adpe`	`Optional[float]`	ADPe embodied impact of the server in kgSbeq.	`SERVER_EMBODIED_IMPACT_ADPE`
`server_embodied_pe`	`Optional[float]`	PE embodied impact of the server in MJ.	`SERVER_EMBODIED_IMPACT_PE`
`server_lifetime`	`Optional[float]`	Lifetime duration of the server in seconds.	`HARDWARE_LIFESPAN`
`batch_size`	`Optional[float]`	The number of requests handled concurrently by the server.	`BATCH_SIZE`

Returns: The environmental impacts dag with all intermediate states.

Source code in ecologits/impacts/llm.py

def compute_llm_impacts_dag(
        model_active_parameter_count: ValueOrRange,
        model_total_parameter_count: ValueOrRange,
        output_token_count: float,
        request_latency: float,
        if_electricity_mix_adpe: float,
        if_electricity_mix_pe: float,
        if_electricity_mix_gwp: float,
        if_electricity_mix_wue: float,
        datacenter_pue: ValueOrRange,
        datacenter_wue: ValueOrRange,
        model_quantization_bits: Optional[int] = MODEL_QUANTIZATION_BITS,
        gpu_energy_alpha: Optional[float] = GPU_ENERGY_ALPHA,
        gpu_energy_beta: Optional[float] = GPU_ENERGY_BETA,
        gpu_energy_gamma: Optional[float] = GPU_ENERGY_GAMMA,
        latency_alpha: Optional[float] = LATENCY_ALPHA,
        latency_beta: Optional[float] = LATENCY_BETA,
        latency_gamma: Optional[float] = LATENCY_GAMMA,
        gpu_memory: Optional[float] = GPU_MEMORY,
        gpu_embodied_gwp: Optional[float] = GPU_EMBODIED_IMPACT_GWP,
        gpu_embodied_adpe: Optional[float] = GPU_EMBODIED_IMPACT_ADPE,
        gpu_embodied_pe: Optional[float] = GPU_EMBODIED_IMPACT_PE,
        server_gpu_count: Optional[int] = SERVER_GPUS,
        server_power: Optional[float] = SERVER_POWER,
        server_embodied_gwp: Optional[float] = SERVER_EMBODIED_IMPACT_GWP,
        server_embodied_adpe: Optional[float] = SERVER_EMBODIED_IMPACT_ADPE,
        server_embodied_pe: Optional[float] = SERVER_EMBODIED_IMPACT_PE,
        server_lifetime: Optional[float] = HARDWARE_LIFESPAN,
        batch_size: Optional[float] =  BATCH_SIZE
) -> dict[str, ValueOrRange]:
    """
    Compute the impacts dag of an LLM generation request.

    Args:
        model_active_parameter_count: Number of active parameters of the model (in billion).
        model_total_parameter_count: Number of parameters of the model (in billion).
        output_token_count: Number of generated tokens.
        request_latency: Measured request latency in seconds.
        if_electricity_mix_adpe: ADPe impact factor of electricity consumption in kgSbeq / kWh (Antimony).
        if_electricity_mix_pe: PE impact factor of electricity consumption in MJ / kWh.
        if_electricity_mix_gwp: GWP impact factor of electricity consumption in kgCO2eq / kWh.
        if_electricity_mix_wue: WCF impact factor of electricity consumption in L / kWh.
        datacenter_wue: Water Usage Effectiveness of the data center in L/kWh.
        datacenter_pue: Power Usage Effectiveness of the data center.
        model_quantization_bits: Number of bits used to represent the model weights.
        gpu_energy_alpha: Alpha coefficient of the "GPU energy" regression.
        gpu_energy_beta: Beta coefficient of the "GPU energy" regression.
        gpu_energy_gamma: Gamma coefficient of the "GPU energy" regression.
        latency_alpha: Alpha coefficient of the "Latency" regression.
        latency_beta: Beta coefficient of the "Latency" regression.
        latency_gamma: Gamma coefficient of the "Latency" regression.
        gpu_memory: Amount of memory available on a single GPU.
        gpu_embodied_gwp: GWP embodied impact of a single GPU.
        gpu_embodied_adpe: ADPe embodied impact of a single GPU.
        gpu_embodied_pe: PE embodied impact of a single GPU.
        server_gpu_count: Number of available GPUs in the server.
        server_power: Power consumption of the server in kW.
        server_embodied_gwp: GWP embodied impact of the server in kgCO2eq.
        server_embodied_adpe: ADPe embodied impact of the server in kgSbeq.
        server_embodied_pe: PE embodied impact of the server in MJ.
        server_lifetime: Lifetime duration of the server in seconds.
        batch_size: The number of requests handled concurrently by the server.
    Returns:
        The environmental impacts dag with all intermediate states.
    """
    results = dag.execute(
        model_active_parameter_count=model_active_parameter_count,
        model_total_parameter_count=model_total_parameter_count,
        model_quantization_bits=model_quantization_bits,
        output_token_count=output_token_count,
        request_latency=request_latency,
        if_electricity_mix_gwp=if_electricity_mix_gwp,
        if_electricity_mix_adpe=if_electricity_mix_adpe,
        if_electricity_mix_pe=if_electricity_mix_pe,
        if_electricity_mix_wue=if_electricity_mix_wue,
        datacenter_wue=datacenter_wue,
        datacenter_pue=datacenter_pue,
        gpu_energy_alpha=gpu_energy_alpha,
        gpu_energy_beta=gpu_energy_beta,
        gpu_energy_gamma=gpu_energy_gamma,
        latency_alpha=latency_alpha,
        latency_beta=latency_beta,
        latency_gamma=latency_gamma,
        gpu_memory=gpu_memory,
        gpu_embodied_gwp=gpu_embodied_gwp,
        gpu_embodied_adpe=gpu_embodied_adpe,
        gpu_embodied_pe=gpu_embodied_pe,
        server_gpu_count=server_gpu_count,
        server_power=server_power,
        server_embodied_gwp=server_embodied_gwp,
        server_embodied_adpe=server_embodied_adpe,
        server_embodied_pe=server_embodied_pe,
        server_lifetime=server_lifetime,
        batch_size=batch_size
    )
    return results

`compute_llm_impacts(model_active_parameter_count, model_total_parameter_count, output_token_count, if_electricity_mix_adpe, if_electricity_mix_pe, if_electricity_mix_gwp, if_electricity_mix_wue, datacenter_pue, datacenter_wue, request_latency=None, **kwargs)`

Compute the impacts of an LLM generation request.

Parameters:

Name	Type	Description	Default
`model_active_parameter_count`	`ValueOrRange`	Number of active parameters of the model (in billion).	required
`model_total_parameter_count`	`ValueOrRange`	Number of total parameters of the model (in billion).	required
`output_token_count`	`float`	Number of generated tokens.	required
`if_electricity_mix_adpe`	`float`	ADPe impact factor of electricity consumption of kgSbeq / kWh (Antimony).	required
`if_electricity_mix_pe`	`float`	PE impact factor of electricity consumption in MJ / kWh.	required
`if_electricity_mix_gwp`	`float`	GWP impact factor of electricity consumption in kgCO2eq / kWh.	required
`if_electricity_mix_wue`	`float`	WCF impact factor of electricity consumption in L / kWh.	required
`datacenter_wue`	`ValueOrRange`	Water Usage Effectiveness of the data center in L/kWh.	required
`datacenter_pue`	`ValueOrRange`	Power Usage Effectiveness of the data center.	required
`request_latency`	`Optional[float]`	Measured request latency in seconds.	`None`
`**kwargs`	`Any`	Any other optional parameter.	`{}`

Returns: The impacts of an LLM generation request.

Source code in ecologits/impacts/llm.py

def compute_llm_impacts(
        model_active_parameter_count: ValueOrRange,
        model_total_parameter_count: ValueOrRange,
        output_token_count: float,
        if_electricity_mix_adpe: float,
        if_electricity_mix_pe: float,
        if_electricity_mix_gwp: float,
        if_electricity_mix_wue: float,
        datacenter_pue: ValueOrRange,
        datacenter_wue: ValueOrRange,
        request_latency: Optional[float] = None,
        **kwargs: Any
) -> Impacts:
    """
    Compute the impacts of an LLM generation request.

    Args:
        model_active_parameter_count: Number of active parameters of the model (in billion).
        model_total_parameter_count: Number of total parameters of the model (in billion).
        output_token_count: Number of generated tokens.
        if_electricity_mix_adpe: ADPe impact factor of electricity consumption of kgSbeq / kWh (Antimony).
        if_electricity_mix_pe: PE impact factor of electricity consumption in MJ / kWh.
        if_electricity_mix_gwp: GWP impact factor of electricity consumption in kgCO2eq / kWh.
        if_electricity_mix_wue: WCF impact factor of electricity consumption in L / kWh.
        datacenter_wue: Water Usage Effectiveness of the data center in L/kWh.
        datacenter_pue: Power Usage Effectiveness of the data center.
        request_latency: Measured request latency in seconds.
        **kwargs: Any other optional parameter.
    Returns:
        The impacts of an LLM generation request.
    """
    if request_latency is None:
        request_latency = math.inf

    active_params = [model_active_parameter_count]
    total_params = [model_total_parameter_count]

    if isinstance(model_active_parameter_count, RangeValue) or isinstance(model_total_parameter_count, RangeValue):
        if isinstance(model_active_parameter_count, RangeValue):
            active_params = [model_active_parameter_count.min, model_active_parameter_count.max]
        else:
            active_params = [model_active_parameter_count, model_active_parameter_count]
        if isinstance(model_total_parameter_count, RangeValue):
            total_params = [model_total_parameter_count.min, model_total_parameter_count.max]
        else:
            total_params = [model_total_parameter_count, model_total_parameter_count]

    results: dict[str, Union[RangeValue, float, int]] = {}
    fields = ["request_energy", "request_usage_gwp", "request_usage_adpe", "request_usage_pe", "request_usage_wcf",
              "request_embodied_gwp", "request_embodied_adpe", "request_embodied_pe"]
    for act_param, tot_param in zip(active_params, total_params):
        res = compute_llm_impacts_dag(
            model_active_parameter_count=act_param,
            model_total_parameter_count=tot_param,
            output_token_count=output_token_count,
            request_latency=request_latency,
            if_electricity_mix_adpe=if_electricity_mix_adpe,
            if_electricity_mix_pe=if_electricity_mix_pe,
            if_electricity_mix_gwp=if_electricity_mix_gwp,
            if_electricity_mix_wue=if_electricity_mix_wue,
            datacenter_pue=datacenter_pue,
            datacenter_wue=datacenter_wue,
            **kwargs
        )
        for field in fields:
            if field in results:
                min_result = results[field]
                max_result = res[field]
                if isinstance(min_result, RangeValue):
                    min_result = cast(Union[float, int], min_result.min)
                if isinstance(max_result, RangeValue):
                    max_result = cast(Union[float, int], max_result.max)
                results[field] = RangeValue(min=min_result, max=max_result)
            else:
                results[field] = res[field]

    energy = Energy(value=results["request_energy"])
    gwp_usage = GWP(value=results["request_usage_gwp"])
    adpe_usage = ADPe(value=results["request_usage_adpe"])
    pe_usage = PE(value=results["request_usage_pe"])
    wcf_usage = WCF(value=results["request_usage_wcf"])
    gwp_embodied = GWP(value=results["request_embodied_gwp"])
    adpe_embodied = ADPe(value=results["request_embodied_adpe"])
    pe_embodied = PE(value=results["request_embodied_pe"])

    return Impacts(
        energy=energy,
        gwp=gwp_usage + gwp_embodied,
        adpe=adpe_usage + adpe_embodied,
        pe=pe_usage + pe_embodied,
        wcf=wcf_usage,
        usage=Usage(
            energy=energy,
            gwp=gwp_usage,
            adpe=adpe_usage,
            pe=pe_usage,
            wcf=wcf_usage
        ),
        embodied=Embodied(
            gwp=gwp_embodied,
            adpe=adpe_embodied,
            pe=pe_embodied
        )
    )