Skip to content

llm

gpu_energy(model_active_parameter_count, output_token_count, gpu_energy_alpha, gpu_energy_beta)

Compute energy consumption of a single GPU.

Parameters:

Name Type Description Default
model_active_parameter_count float

Number of active parameters of the model.

required
output_token_count float

Number of generated tokens.

required
gpu_energy_alpha float

Alpha parameter of the GPU linear power consumption profile.

required
gpu_energy_beta float

Beta parameter of the GPU linear power consumption profile.

required

Returns:

Type Description
float

The energy consumption of a single GPU in kWh.

Source code in ecologits/impacts/llm.py
@dag.asset
def gpu_energy(
        model_active_parameter_count: float,
        output_token_count: float,
        gpu_energy_alpha: float,
        gpu_energy_beta: float
) -> float:
    """
    Compute energy consumption of a single GPU.

    Args:
        model_active_parameter_count: Number of active parameters of the model.
        output_token_count: Number of generated tokens.
        gpu_energy_alpha: Alpha parameter of the GPU linear power consumption profile.
        gpu_energy_beta: Beta parameter of the GPU linear power consumption profile.

    Returns:
        The energy consumption of a single GPU in kWh.
    """
    return output_token_count * (gpu_energy_alpha * model_active_parameter_count + gpu_energy_beta)

generation_latency(model_active_parameter_count, output_token_count, gpu_latency_alpha, gpu_latency_beta, request_latency)

Compute the token generation latency in seconds.

Parameters:

Name Type Description Default
model_active_parameter_count float

Number of active parameters of the model.

required
output_token_count float

Number of generated tokens.

required
gpu_latency_alpha float

Alpha parameter of the GPU linear latency profile.

required
gpu_latency_beta float

Beta parameter of the GPU linear latency profile.

required
request_latency float

Measured request latency (upper bound) in seconds.

required

Returns:

Type Description
float

The token generation latency in seconds.

Source code in ecologits/impacts/llm.py
@dag.asset
def generation_latency(
        model_active_parameter_count: float,
        output_token_count: float,
        gpu_latency_alpha: float,
        gpu_latency_beta: float,
        request_latency: float,
) -> float:
    """
    Compute the token generation latency in seconds.

    Args:
        model_active_parameter_count: Number of active parameters of the model.
        output_token_count: Number of generated tokens.
        gpu_latency_alpha: Alpha parameter of the GPU linear latency profile.
        gpu_latency_beta: Beta parameter of the GPU linear latency profile.
        request_latency: Measured request latency (upper bound) in seconds.

    Returns:
        The token generation latency in seconds.
    """
    gpu_latency = output_token_count * (gpu_latency_alpha * model_active_parameter_count + gpu_latency_beta)
    return min(gpu_latency, request_latency)

model_required_memory(model_total_parameter_count, model_quantization_bits)

Compute the required memory to load the model on GPU.

Parameters:

Name Type Description Default
model_total_parameter_count float

Number of parameters of the model.

required
model_quantization_bits int

Number of bits used to represent the model weights.

required

Returns:

Type Description
float

The amount of required GPU memory to load the model.

Source code in ecologits/impacts/llm.py
@dag.asset
def model_required_memory(
        model_total_parameter_count: float,
        model_quantization_bits: int,
) -> float:
    """
    Compute the required memory to load the model on GPU.

    Args:
        model_total_parameter_count: Number of parameters of the model.
        model_quantization_bits: Number of bits used to represent the model weights.

    Returns:
        The amount of required GPU memory to load the model.
    """
    return 1.2 * model_total_parameter_count * model_quantization_bits / 8

gpu_required_count(model_required_memory, gpu_memory)

Compute the number of required GPU to store the model.

Parameters:

Name Type Description Default
model_required_memory float

Required memory to load the model on GPU.

required
gpu_memory float

Amount of memory available on a single GPU.

required

Returns:

Type Description
int

The number of required GPUs to load the model.

Source code in ecologits/impacts/llm.py
@dag.asset
def gpu_required_count(
        model_required_memory: float,
        gpu_memory: float
) -> int:
    """
    Compute the number of required GPU to store the model.

    Args:
        model_required_memory: Required memory to load the model on GPU.
        gpu_memory: Amount of memory available on a single GPU.

    Returns:
        The number of required GPUs to load the model.
    """
    return ceil(model_required_memory / gpu_memory)

server_energy(generation_latency, server_power, server_gpu_count, gpu_required_count)

Compute the energy consumption of the server.

Parameters:

Name Type Description Default
generation_latency float

Token generation latency in seconds.

required
server_power float

Power consumption of the server in kW.

required
server_gpu_count int

Number of available GPUs in the server.

required
gpu_required_count int

Number of required GPUs to load the model.

required

Returns:

Type Description
float

The energy consumption of the server (GPUs are not included) in kWh.

Source code in ecologits/impacts/llm.py
@dag.asset
def server_energy(
        generation_latency: float,
        server_power: float,
        server_gpu_count: int,
        gpu_required_count: int
) -> float:
    """
    Compute the energy consumption of the server.

    Args:
        generation_latency: Token generation latency in seconds.
        server_power: Power consumption of the server in kW.
        server_gpu_count: Number of available GPUs in the server.
        gpu_required_count: Number of required GPUs to load the model.

    Returns:
        The energy consumption of the server (GPUs are not included) in kWh.
    """
    return (generation_latency / 3600) * server_power * (gpu_required_count / server_gpu_count)

request_energy(datacenter_pue, server_energy, gpu_required_count, gpu_energy)

Compute the energy consumption of the request.

Parameters:

Name Type Description Default
datacenter_pue float

PUE of the datacenter.

required
server_energy float

Energy consumption of the server in kWh.

required
gpu_required_count int

Number of required GPUs to load the model.

required
gpu_energy float

Energy consumption of a single GPU in kWh.

required

Returns:

Type Description
float

The energy consumption of the request in kWh.

Source code in ecologits/impacts/llm.py
@dag.asset
def request_energy(
        datacenter_pue: float,
        server_energy: float,
        gpu_required_count: int,
        gpu_energy: float
) -> float:
    """
    Compute the energy consumption of the request.

    Args:
        datacenter_pue: PUE of the datacenter.
        server_energy: Energy consumption of the server in kWh.
        gpu_required_count: Number of required GPUs to load the model.
        gpu_energy: Energy consumption of a single GPU in kWh.

    Returns:
        The energy consumption of the request in kWh.
    """
    return datacenter_pue * (server_energy + gpu_required_count * gpu_energy)

request_usage_gwp(request_energy, if_electricity_mix_gwp)

Compute the Global Warming Potential (GWP) usage impact of the request.

Parameters:

Name Type Description Default
request_energy float

Energy consumption of the request in kWh.

required
if_electricity_mix_gwp float

GWP impact factor of electricity consumption in kgCO2eq / kWh.

required

Returns:

Type Description
float

The GWP usage impact of the request in kgCO2eq.

Source code in ecologits/impacts/llm.py
@dag.asset
def request_usage_gwp(
        request_energy: float,
        if_electricity_mix_gwp: float
) -> float:
    """
    Compute the Global Warming Potential (GWP) usage impact of the request.

    Args:
        request_energy: Energy consumption of the request in kWh.
        if_electricity_mix_gwp: GWP impact factor of electricity consumption in kgCO2eq / kWh.

    Returns:
        The GWP usage impact of the request in kgCO2eq.
    """
    return request_energy * if_electricity_mix_gwp

request_usage_adpe(request_energy, if_electricity_mix_adpe)

Compute the Abiotic Depletion Potential for Elements (ADPe) usage impact of the request.

Parameters:

Name Type Description Default
request_energy float

Energy consumption of the request in kWh.

required
if_electricity_mix_adpe float

ADPe impact factor of electricity consumption in kgSbeq / kWh.

required

Returns:

Type Description
float

The ADPe usage impact of the request in kgSbeq.

Source code in ecologits/impacts/llm.py
@dag.asset
def request_usage_adpe(
        request_energy: float,
        if_electricity_mix_adpe: float
) -> float:
    """
    Compute the Abiotic Depletion Potential for Elements (ADPe) usage impact of the request.

    Args:
        request_energy: Energy consumption of the request in kWh.
        if_electricity_mix_adpe: ADPe impact factor of electricity consumption in kgSbeq / kWh.

    Returns:
        The ADPe usage impact of the request in kgSbeq.
    """
    return request_energy * if_electricity_mix_adpe

request_usage_pe(request_energy, if_electricity_mix_pe)

Compute the Primary Energy (PE) usage impact of the request.

Parameters:

Name Type Description Default
request_energy float

Energy consumption of the request in kWh.

required
if_electricity_mix_pe float

PE impact factor of electricity consumption in MJ / kWh.

required

Returns:

Type Description
float

The PE usage impact of the request in MJ.

Source code in ecologits/impacts/llm.py
@dag.asset
def request_usage_pe(
        request_energy: float,
        if_electricity_mix_pe: float
) -> float:
    """
    Compute the Primary Energy (PE) usage impact of the request.

    Args:
        request_energy: Energy consumption of the request in kWh.
        if_electricity_mix_pe: PE impact factor of electricity consumption in MJ / kWh.

    Returns:
        The PE usage impact of the request in MJ.
    """
    return request_energy * if_electricity_mix_pe

server_gpu_embodied_gwp(server_embodied_gwp, server_gpu_count, gpu_embodied_gwp, gpu_required_count)

Compute the Global Warming Potential (GWP) embodied impact of the server

Parameters:

Name Type Description Default
server_embodied_gwp float

GWP embodied impact of the server in kgCO2eq.

required
server_gpu_count float

Number of available GPUs in the server.

required
gpu_embodied_gwp float

GWP embodied impact of a single GPU in kgCO2eq.

required
gpu_required_count int

Number of required GPUs to load the model.

required

Returns:

Type Description
float

The GWP embodied impact of the server and the GPUs in kgCO2eq.

Source code in ecologits/impacts/llm.py
@dag.asset
def server_gpu_embodied_gwp(
        server_embodied_gwp: float,
        server_gpu_count: float,
        gpu_embodied_gwp: float,
        gpu_required_count: int
) -> float:
    """
    Compute the Global Warming Potential (GWP) embodied impact of the server

    Args:
        server_embodied_gwp: GWP embodied impact of the server in kgCO2eq.
        server_gpu_count: Number of available GPUs in the server.
        gpu_embodied_gwp: GWP embodied impact of a single GPU in kgCO2eq.
        gpu_required_count: Number of required GPUs to load the model.

    Returns:
        The GWP embodied impact of the server and the GPUs in kgCO2eq.
    """
    return (gpu_required_count / server_gpu_count) * server_embodied_gwp + gpu_required_count * gpu_embodied_gwp

server_gpu_embodied_adpe(server_embodied_adpe, server_gpu_count, gpu_embodied_adpe, gpu_required_count)

Compute the Abiotic Depletion Potential for Elements (ADPe) embodied impact of the server

Parameters:

Name Type Description Default
server_embodied_adpe float

ADPe embodied impact of the server in kgSbeq.

required
server_gpu_count float

Number of available GPUs in the server.

required
gpu_embodied_adpe float

ADPe embodied impact of a single GPU in kgSbeq.

required
gpu_required_count int

Number of required GPUs to load the model.

required

Returns:

Type Description
float

The ADPe embodied impact of the server and the GPUs in kgSbeq.

Source code in ecologits/impacts/llm.py
@dag.asset
def server_gpu_embodied_adpe(
        server_embodied_adpe: float,
        server_gpu_count: float,
        gpu_embodied_adpe: float,
        gpu_required_count: int
) -> float:
    """
    Compute the Abiotic Depletion Potential for Elements (ADPe) embodied impact of the server

    Args:
        server_embodied_adpe: ADPe embodied impact of the server in kgSbeq.
        server_gpu_count: Number of available GPUs in the server.
        gpu_embodied_adpe: ADPe embodied impact of a single GPU in kgSbeq.
        gpu_required_count: Number of required GPUs to load the model.

    Returns:
        The ADPe embodied impact of the server and the GPUs in kgSbeq.
    """
    return (gpu_required_count / server_gpu_count) * server_embodied_adpe + gpu_required_count * gpu_embodied_adpe

server_gpu_embodied_pe(server_embodied_pe, server_gpu_count, gpu_embodied_pe, gpu_required_count)

Compute the Primary Energy (PE) embodied impact of the server

Parameters:

Name Type Description Default
server_embodied_pe float

PE embodied impact of the server in MJ.

required
server_gpu_count float

Number of available GPUs in the server.

required
gpu_embodied_pe float

PE embodied impact of a single GPU in MJ.

required
gpu_required_count int

Number of required GPUs to load the model.

required

Returns:

Type Description
float

The PE embodied impact of the server and the GPUs in MJ.

Source code in ecologits/impacts/llm.py
@dag.asset
def server_gpu_embodied_pe(
        server_embodied_pe: float,
        server_gpu_count: float,
        gpu_embodied_pe: float,
        gpu_required_count: int
) -> float:
    """
    Compute the Primary Energy (PE) embodied impact of the server

    Args:
        server_embodied_pe: PE embodied impact of the server in MJ.
        server_gpu_count: Number of available GPUs in the server.
        gpu_embodied_pe: PE embodied impact of a single GPU in MJ.
        gpu_required_count: Number of required GPUs to load the model.

    Returns:
        The PE embodied impact of the server and the GPUs in MJ.
    """
    return (gpu_required_count / server_gpu_count) * server_embodied_pe + gpu_required_count * gpu_embodied_pe

request_embodied_gwp(server_gpu_embodied_gwp, server_lifetime, generation_latency)

Compute the Global Warming Potential (GWP) embodied impact of the request.

Parameters:

Name Type Description Default
server_gpu_embodied_gwp float

GWP embodied impact of the server and the GPUs in kgCO2eq.

required
server_lifetime float

Lifetime duration of the server in seconds.

required
generation_latency float

Token generation latency in seconds.

required

Returns:

Type Description
float

The GWP embodied impact of the request in kgCO2eq.

Source code in ecologits/impacts/llm.py
@dag.asset
def request_embodied_gwp(
        server_gpu_embodied_gwp: float,
        server_lifetime: float,
        generation_latency: float
) -> float:
    """
    Compute the Global Warming Potential (GWP) embodied impact of the request.

    Args:
        server_gpu_embodied_gwp: GWP embodied impact of the server and the GPUs in kgCO2eq.
        server_lifetime: Lifetime duration of the server in seconds.
        generation_latency: Token generation latency in seconds.

    Returns:
        The GWP embodied impact of the request in kgCO2eq.
    """
    return (generation_latency / server_lifetime) * server_gpu_embodied_gwp

request_embodied_adpe(server_gpu_embodied_adpe, server_lifetime, generation_latency)

Compute the Abiotic Depletion Potential for Elements (ADPe) embodied impact of the request.

Parameters:

Name Type Description Default
server_gpu_embodied_adpe float

ADPe embodied impact of the server and the GPUs in kgSbeq.

required
server_lifetime float

Lifetime duration of the server in seconds.

required
generation_latency float

Token generation latency in seconds.

required

Returns:

Type Description
float

The ADPe embodied impact of the request in kgSbeq.

Source code in ecologits/impacts/llm.py
@dag.asset
def request_embodied_adpe(
        server_gpu_embodied_adpe: float,
        server_lifetime: float,
        generation_latency: float
) -> float:
    """
    Compute the Abiotic Depletion Potential for Elements (ADPe) embodied impact of the request.

    Args:
        server_gpu_embodied_adpe: ADPe embodied impact of the server and the GPUs in kgSbeq.
        server_lifetime: Lifetime duration of the server in seconds.
        generation_latency: Token generation latency in seconds.

    Returns:
        The ADPe embodied impact of the request in kgSbeq.
    """
    return (generation_latency / server_lifetime) * server_gpu_embodied_adpe

request_embodied_pe(server_gpu_embodied_pe, server_lifetime, generation_latency)

Compute the Primary Energy (PE) embodied impact of the request.

Parameters:

Name Type Description Default
server_gpu_embodied_pe float

PE embodied impact of the server and the GPUs in MJ.

required
server_lifetime float

Lifetime duration of the server in seconds.

required
generation_latency float

Token generation latency in seconds.

required

Returns:

Type Description
float

The PE embodied impact of the request in MJ.

Source code in ecologits/impacts/llm.py
@dag.asset
def request_embodied_pe(
        server_gpu_embodied_pe: float,
        server_lifetime: float,
        generation_latency: float
) -> float:
    """
    Compute the Primary Energy (PE) embodied impact of the request.

    Args:
        server_gpu_embodied_pe: PE embodied impact of the server and the GPUs in MJ.
        server_lifetime: Lifetime duration of the server in seconds.
        generation_latency: Token generation latency in seconds.

    Returns:
        The PE embodied impact of the request in MJ.
    """
    return (generation_latency / server_lifetime) * server_gpu_embodied_pe

compute_llm_impacts_dag(model_active_parameter_count, model_total_parameter_count, output_token_count, request_latency, if_electricity_mix_adpe, if_electricity_mix_pe, if_electricity_mix_gwp, model_quantization_bits=MODEL_QUANTIZATION_BITS, gpu_energy_alpha=GPU_ENERGY_ALPHA, gpu_energy_beta=GPU_ENERGY_BETA, gpu_latency_alpha=GPU_LATENCY_ALPHA, gpu_latency_beta=GPU_LATENCY_BETA, gpu_memory=GPU_MEMORY, gpu_embodied_gwp=GPU_EMBODIED_IMPACT_GWP, gpu_embodied_adpe=GPU_EMBODIED_IMPACT_ADPE, gpu_embodied_pe=GPU_EMBODIED_IMPACT_PE, server_gpu_count=SERVER_GPUS, server_power=SERVER_POWER, server_embodied_gwp=SERVER_EMBODIED_IMPACT_GWP, server_embodied_adpe=SERVER_EMBODIED_IMPACT_ADPE, server_embodied_pe=SERVER_EMBODIED_IMPACT_PE, server_lifetime=HARDWARE_LIFESPAN, datacenter_pue=DATACENTER_PUE)

Compute the impacts dag of an LLM generation request.

Parameters:

Name Type Description Default
model_active_parameter_count ValueOrRange

Number of active parameters of the model.

required
model_total_parameter_count ValueOrRange

Number of parameters of the model.

required
output_token_count float

Number of generated tokens.

required
request_latency float

Measured request latency in seconds.

required
if_electricity_mix_adpe float

ADPe impact factor of electricity consumption of kgSbeq / kWh (Antimony).

required
if_electricity_mix_pe float

PE impact factor of electricity consumption in MJ / kWh.

required
if_electricity_mix_gwp float

GWP impact factor of electricity consumption in kgCO2eq / kWh.

required
model_quantization_bits Optional[int]

Number of bits used to represent the model weights.

MODEL_QUANTIZATION_BITS
gpu_energy_alpha Optional[float]

Alpha parameter of the GPU linear power consumption profile.

GPU_ENERGY_ALPHA
gpu_energy_beta Optional[float]

Beta parameter of the GPU linear power consumption profile.

GPU_ENERGY_BETA
gpu_latency_alpha Optional[float]

Alpha parameter of the GPU linear latency profile.

GPU_LATENCY_ALPHA
gpu_latency_beta Optional[float]

Beta parameter of the GPU linear latency profile.

GPU_LATENCY_BETA
gpu_memory Optional[float]

Amount of memory available on a single GPU.

GPU_MEMORY
gpu_embodied_gwp Optional[float]

GWP embodied impact of a single GPU.

GPU_EMBODIED_IMPACT_GWP
gpu_embodied_adpe Optional[float]

ADPe embodied impact of a single GPU.

GPU_EMBODIED_IMPACT_ADPE
gpu_embodied_pe Optional[float]

PE embodied impact of a single GPU.

GPU_EMBODIED_IMPACT_PE
server_gpu_count Optional[int]

Number of available GPUs in the server.

SERVER_GPUS
server_power Optional[float]

Power consumption of the server in kW.

SERVER_POWER
server_embodied_gwp Optional[float]

GWP embodied impact of the server in kgCO2eq.

SERVER_EMBODIED_IMPACT_GWP
server_embodied_adpe Optional[float]

ADPe embodied impact of the server in kgSbeq.

SERVER_EMBODIED_IMPACT_ADPE
server_embodied_pe Optional[float]

PE embodied impact of the server in MJ.

SERVER_EMBODIED_IMPACT_PE
server_lifetime Optional[float]

Lifetime duration of the server in seconds.

HARDWARE_LIFESPAN
datacenter_pue Optional[float]

PUE of the datacenter.

DATACENTER_PUE

Returns:

Type Description
dict[str, float]

The impacts dag with all intermediate states.

Source code in ecologits/impacts/llm.py
def compute_llm_impacts_dag(
        model_active_parameter_count: ValueOrRange,
        model_total_parameter_count: ValueOrRange,
        output_token_count: float,
        request_latency: float,
        if_electricity_mix_adpe: float,
        if_electricity_mix_pe: float,
        if_electricity_mix_gwp: float,
        model_quantization_bits: Optional[int] = MODEL_QUANTIZATION_BITS,
        gpu_energy_alpha: Optional[float] = GPU_ENERGY_ALPHA,
        gpu_energy_beta: Optional[float] = GPU_ENERGY_BETA,
        gpu_latency_alpha: Optional[float] = GPU_LATENCY_ALPHA,
        gpu_latency_beta: Optional[float] = GPU_LATENCY_BETA,
        gpu_memory: Optional[float] = GPU_MEMORY,
        gpu_embodied_gwp: Optional[float] = GPU_EMBODIED_IMPACT_GWP,
        gpu_embodied_adpe: Optional[float] = GPU_EMBODIED_IMPACT_ADPE,
        gpu_embodied_pe: Optional[float] = GPU_EMBODIED_IMPACT_PE,
        server_gpu_count: Optional[int] = SERVER_GPUS,
        server_power: Optional[float] = SERVER_POWER,
        server_embodied_gwp: Optional[float] = SERVER_EMBODIED_IMPACT_GWP,
        server_embodied_adpe: Optional[float] = SERVER_EMBODIED_IMPACT_ADPE,
        server_embodied_pe: Optional[float] = SERVER_EMBODIED_IMPACT_PE,
        server_lifetime: Optional[float] = HARDWARE_LIFESPAN,
        datacenter_pue: Optional[float] = DATACENTER_PUE,
) -> dict[str, float]:
    """
    Compute the impacts dag of an LLM generation request.

    Args:
        model_active_parameter_count: Number of active parameters of the model.
        model_total_parameter_count: Number of parameters of the model.
        output_token_count: Number of generated tokens.
        request_latency: Measured request latency in seconds.
        if_electricity_mix_adpe: ADPe impact factor of electricity consumption of kgSbeq / kWh (Antimony).
        if_electricity_mix_pe: PE impact factor of electricity consumption in MJ / kWh.
        if_electricity_mix_gwp: GWP impact factor of electricity consumption in kgCO2eq / kWh.
        model_quantization_bits: Number of bits used to represent the model weights.
        gpu_energy_alpha: Alpha parameter of the GPU linear power consumption profile.
        gpu_energy_beta: Beta parameter of the GPU linear power consumption profile.
        gpu_latency_alpha: Alpha parameter of the GPU linear latency profile.
        gpu_latency_beta: Beta parameter of the GPU linear latency profile.
        gpu_memory: Amount of memory available on a single GPU.
        gpu_embodied_gwp: GWP embodied impact of a single GPU.
        gpu_embodied_adpe: ADPe embodied impact of a single GPU.
        gpu_embodied_pe: PE embodied impact of a single GPU.
        server_gpu_count: Number of available GPUs in the server.
        server_power: Power consumption of the server in kW.
        server_embodied_gwp: GWP embodied impact of the server in kgCO2eq.
        server_embodied_adpe: ADPe embodied impact of the server in kgSbeq.
        server_embodied_pe: PE embodied impact of the server in MJ.
        server_lifetime: Lifetime duration of the server in seconds.
        datacenter_pue: PUE of the datacenter.

    Returns:
        The impacts dag with all intermediate states.
    """
    results = dag.execute(
        model_active_parameter_count=model_active_parameter_count,
        model_total_parameter_count=model_total_parameter_count,
        model_quantization_bits=model_quantization_bits,
        output_token_count=output_token_count,
        request_latency=request_latency,
        if_electricity_mix_gwp=if_electricity_mix_gwp,
        if_electricity_mix_adpe=if_electricity_mix_adpe,
        if_electricity_mix_pe=if_electricity_mix_pe,
        gpu_energy_alpha=gpu_energy_alpha,
        gpu_energy_beta=gpu_energy_beta,
        gpu_latency_alpha=gpu_latency_alpha,
        gpu_latency_beta=gpu_latency_beta,
        gpu_memory=gpu_memory,
        gpu_embodied_gwp=gpu_embodied_gwp,
        gpu_embodied_adpe=gpu_embodied_adpe,
        gpu_embodied_pe=gpu_embodied_pe,
        server_gpu_count=server_gpu_count,
        server_power=server_power,
        server_embodied_gwp=server_embodied_gwp,
        server_embodied_adpe=server_embodied_adpe,
        server_embodied_pe=server_embodied_pe,
        server_lifetime=server_lifetime,
        datacenter_pue=datacenter_pue,
    )
    return results

compute_llm_impacts(model_active_parameter_count, model_total_parameter_count, output_token_count, if_electricity_mix_adpe, if_electricity_mix_pe, if_electricity_mix_gwp, request_latency=None, **kwargs)

Compute the impacts of an LLM generation request.

Parameters:

Name Type Description Default
model_active_parameter_count ValueOrRange

Number of active parameters of the model.

required
model_total_parameter_count ValueOrRange

Number of total parameters of the model.

required
output_token_count float

Number of generated tokens.

required
if_electricity_mix_adpe float

ADPe impact factor of electricity consumption of kgSbeq / kWh (Antimony).

required
if_electricity_mix_pe float

PE impact factor of electricity consumption in MJ / kWh.

required
if_electricity_mix_gwp float

GWP impact factor of electricity consumption in kgCO2eq / kWh.

required
request_latency Optional[float]

Measured request latency in seconds.

None
**kwargs Any

Any other optional parameter.

{}

Returns:

Type Description
Impacts

The impacts of an LLM generation request.

Source code in ecologits/impacts/llm.py
def compute_llm_impacts(
        model_active_parameter_count: ValueOrRange,
        model_total_parameter_count: ValueOrRange,
        output_token_count: float,
        if_electricity_mix_adpe: float,
        if_electricity_mix_pe: float,
        if_electricity_mix_gwp: float,
        request_latency: Optional[float] = None,
        **kwargs: Any
) -> Impacts:
    """
    Compute the impacts of an LLM generation request.

    Args:
        model_active_parameter_count: Number of active parameters of the model.
        model_total_parameter_count: Number of total parameters of the model.
        output_token_count: Number of generated tokens.
        if_electricity_mix_adpe: ADPe impact factor of electricity consumption of kgSbeq / kWh (Antimony).
        if_electricity_mix_pe: PE impact factor of electricity consumption in MJ / kWh.
        if_electricity_mix_gwp: GWP impact factor of electricity consumption in kgCO2eq / kWh.
        request_latency: Measured request latency in seconds.
        **kwargs: Any other optional parameter.

    Returns:
        The impacts of an LLM generation request.
    """
    if request_latency is None:
        request_latency = math.inf

    active_params = [model_active_parameter_count]
    total_params = [model_total_parameter_count]

    if isinstance(model_active_parameter_count, RangeValue) or isinstance(model_total_parameter_count, RangeValue):
        if isinstance(model_active_parameter_count, RangeValue):
            active_params = [model_active_parameter_count.min, model_active_parameter_count.max]
        else:
            active_params = [model_active_parameter_count, model_active_parameter_count]
        if isinstance(model_total_parameter_count, RangeValue):
            total_params = [model_total_parameter_count.min, model_total_parameter_count.max]
        else:
            total_params = [model_total_parameter_count, model_total_parameter_count]

    results: dict[str, Union[RangeValue, float, int]] = {}
    fields = ["request_energy", "request_usage_gwp", "request_usage_adpe", "request_usage_pe",
              "request_embodied_gwp", "request_embodied_adpe", "request_embodied_pe"]
    for act_param, tot_param in zip(active_params, total_params):
        res = compute_llm_impacts_dag(
            model_active_parameter_count=act_param,
            model_total_parameter_count=tot_param,
            output_token_count=output_token_count,
            request_latency=request_latency,
            if_electricity_mix_adpe=if_electricity_mix_adpe,
            if_electricity_mix_pe=if_electricity_mix_pe,
            if_electricity_mix_gwp=if_electricity_mix_gwp,
            **kwargs
        )
        for field in fields:
            if field in results:
                if isinstance(results[field], (float, int)):
                    value = cast(Union[float, int], results[field])
                    results[field] = RangeValue(min=value, max=res[field])
                else:
                    raise TypeError("Cannot transform RangeValue.")
            else:
                results[field] = res[field]

    energy = Energy(value=results["request_energy"])
    gwp_usage = GWP(value=results["request_usage_gwp"])
    adpe_usage = ADPe(value=results["request_usage_adpe"])
    pe_usage = PE(value=results["request_usage_pe"])
    gwp_embodied = GWP(value=results["request_embodied_gwp"])
    adpe_embodied = ADPe(value=results["request_embodied_adpe"])
    pe_embodied = PE(value=results["request_embodied_pe"])
    return Impacts(
        energy=energy,
        gwp=gwp_usage + gwp_embodied,
        adpe=adpe_usage + adpe_embodied,
        pe=pe_usage + pe_embodied,
        usage=Usage(
            energy=energy,
            gwp=gwp_usage,
            adpe=adpe_usage,
            pe=pe_usage
        ),
        embodied=Embodied(
            gwp=gwp_embodied,
            adpe=adpe_embodied,
            pe=pe_embodied
        )
    )