From 46757ff063d7f6657e7eeef17350f08aa0f1e42f Mon Sep 17 00:00:00 2001
From: Nina Stawski <me@ninastawski.com>
Date: Tue, 23 May 2023 22:02:26 -0700
Subject: [PATCH] Use BoundedAttributes instead of raw dict to extract
 attributes from LogRecord #3114 (#3310)

Co-authored-by: Diego Hurtado <ocelotl@users.noreply.github.com>
Co-authored-by: OpenTelemetry Bot <107717825+opentelemetrybot@users.noreply.github.com>
---
 CHANGELOG.md                                  |   2 +-
 .../src/opentelemetry/sdk/_logs/__init__.py   |   2 +
 .../sdk/_logs/_internal/__init__.py           | 119 +++++++++++++++++-
 opentelemetry-sdk/tests/logs/test_handler.py  |   2 +
 .../tests/logs/test_log_limits.py             |  40 ++++++
 .../tests/logs/test_log_record.py             |  67 +++++++++-
 6 files changed, 228 insertions(+), 4 deletions(-)
 create mode 100644 opentelemetry-sdk/tests/logs/test_log_limits.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a589687295..3ce6fc4251 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,7 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ## Unreleased
-
+- Use BoundedAttributes instead of raw dict to extract attributes from LogRecord and Support dropped_attributes_count in LogRecord ([#3310](https://github.com/open-telemetry/opentelemetry-python/pull/3310))
 ## Version 1.18.0/0.39b0 (2023-05-04)
 
 - Select histogram aggregation with an environment variable
diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/_logs/__init__.py b/opentelemetry-sdk/src/opentelemetry/sdk/_logs/__init__.py
index a86fbaee0f..881bb9a4b2 100644
--- a/opentelemetry-sdk/src/opentelemetry/sdk/_logs/__init__.py
+++ b/opentelemetry-sdk/src/opentelemetry/sdk/_logs/__init__.py
@@ -18,6 +18,7 @@
     Logger,
     LoggerProvider,
     LoggingHandler,
+    LogLimits,
     LogRecord,
     LogRecordProcessor,
 )
@@ -27,6 +28,7 @@
     "Logger",
     "LoggerProvider",
     "LoggingHandler",
+    "LogLimits",
     "LogRecord",
     "LogRecordProcessor",
 ]
diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/_logs/_internal/__init__.py b/opentelemetry-sdk/src/opentelemetry/sdk/_logs/_internal/__init__.py
index eda9b093c9..7410138067 100644
--- a/opentelemetry-sdk/src/opentelemetry/sdk/_logs/_internal/__init__.py
+++ b/opentelemetry-sdk/src/opentelemetry/sdk/_logs/_internal/__init__.py
@@ -19,6 +19,7 @@
 import logging
 import threading
 import traceback
+from os import environ
 from time import time_ns
 from typing import Any, Callable, Optional, Tuple, Union
 
@@ -31,6 +32,11 @@
     get_logger_provider,
     std_to_otel,
 )
+from opentelemetry.attributes import BoundedAttributes
+from opentelemetry.sdk.environment_variables import (
+    OTEL_ATTRIBUTE_COUNT_LIMIT,
+    OTEL_ATTRIBUTE_VALUE_LENGTH_LIMIT,
+)
 from opentelemetry.sdk.resources import Resource
 from opentelemetry.sdk.util import ns_to_iso_str
 from opentelemetry.sdk.util.instrumentation import InstrumentationScope
@@ -45,6 +51,101 @@
 
 _logger = logging.getLogger(__name__)
 
+_DEFAULT_OTEL_ATTRIBUTE_COUNT_LIMIT = 128
+_ENV_VALUE_UNSET = ""
+
+
+class LogLimits:
+    """This class is based on a SpanLimits class in the Tracing module.
+
+    This class represents the limits that should be enforced on recorded data such as events, links, attributes etc.
+
+    This class does not enforce any limits itself. It only provides a way to read limits from env,
+    default values and from user provided arguments.
+
+    All limit arguments must be either a non-negative integer, ``None`` or ``LogLimits.UNSET``.
+
+    - All limit arguments are optional.
+    - If a limit argument is not set, the class will try to read its value from the corresponding
+      environment variable.
+    - If the environment variable is not set, the default value, if any, will be used.
+
+    Limit precedence:
+
+    - If a model specific limit is set, it will be used.
+    - Else if the corresponding global limit is set, it will be used.
+    - Else if the model specific limit has a default value, the default value will be used.
+    - Else if the global limit has a default value, the default value will be used.
+
+    Args:
+        max_attributes: Maximum number of attributes that can be added to a span, event, and link.
+            Environment variable: ``OTEL_ATTRIBUTE_COUNT_LIMIT``
+            Default: {_DEFAULT_OTEL_ATTRIBUTE_COUNT_LIMIT}
+        max_attribute_length: Maximum length an attribute value can have. Values longer than
+            the specified length will be truncated.
+    """
+
+    UNSET = -1
+
+    def __init__(
+        self,
+        max_attributes: Optional[int] = None,
+        max_attribute_length: Optional[int] = None,
+    ):
+
+        # attribute count
+        global_max_attributes = self._from_env_if_absent(
+            max_attributes, OTEL_ATTRIBUTE_COUNT_LIMIT
+        )
+        self.max_attributes = (
+            global_max_attributes
+            if global_max_attributes is not None
+            else _DEFAULT_OTEL_ATTRIBUTE_COUNT_LIMIT
+        )
+
+        # attribute length
+        self.max_attribute_length = self._from_env_if_absent(
+            max_attribute_length,
+            OTEL_ATTRIBUTE_VALUE_LENGTH_LIMIT,
+        )
+
+    def __repr__(self):
+        return f"{type(self).__name__}(max_attributes={self.max_attributes}, max_attribute_length={self.max_attribute_length})"
+
+    @classmethod
+    def _from_env_if_absent(
+        cls, value: Optional[int], env_var: str, default: Optional[int] = None
+    ) -> Optional[int]:
+        if value == cls.UNSET:
+            return None
+
+        err_msg = "{0} must be a non-negative integer but got {}"
+
+        # if no value is provided for the limit, try to load it from env
+        if value is None:
+            # return default value if env var is not set
+            if env_var not in environ:
+                return default
+
+            str_value = environ.get(env_var, "").strip().lower()
+            if str_value == _ENV_VALUE_UNSET:
+                return None
+
+            try:
+                value = int(str_value)
+            except ValueError:
+                raise ValueError(err_msg.format(env_var, str_value))
+
+        if value < 0:
+            raise ValueError(err_msg.format(env_var, value))
+        return value
+
+
+_UnsetLogLimits = LogLimits(
+    max_attributes=LogLimits.UNSET,
+    max_attribute_length=LogLimits.UNSET,
+)
+
 
 class LogRecord(APILogRecord):
     """A LogRecord instance represents an event being logged.
@@ -66,6 +167,7 @@ def __init__(
         body: Optional[Any] = None,
         resource: Optional[Resource] = None,
         attributes: Optional[Attributes] = None,
+        limits: Optional[LogLimits] = _UnsetLogLimits,
     ):
         super().__init__(
             **{
@@ -77,7 +179,12 @@ def __init__(
                 "severity_text": severity_text,
                 "severity_number": severity_number,
                 "body": body,
-                "attributes": attributes,
+                "attributes": BoundedAttributes(
+                    maxlen=limits.max_attributes,
+                    attributes=attributes if bool(attributes) else None,
+                    immutable=False,
+                    max_value_len=limits.max_attribute_length,
+                ),
             }
         )
         self.resource = resource
@@ -93,7 +200,9 @@ def to_json(self, indent=4) -> str:
                 "body": self.body,
                 "severity_number": repr(self.severity_number),
                 "severity_text": self.severity_text,
-                "attributes": self.attributes,
+                "attributes": dict(self.attributes)
+                if bool(self.attributes)
+                else None,
                 "timestamp": ns_to_iso_str(self.timestamp),
                 "trace_id": f"0x{format_trace_id(self.trace_id)}"
                 if self.trace_id is not None
@@ -109,6 +218,12 @@ def to_json(self, indent=4) -> str:
             indent=indent,
         )
 
+    @property
+    def dropped_attributes(self) -> int:
+        if self.attributes:
+            return self.attributes.dropped
+        return 0
+
 
 class LogData:
     """Readable LogRecord data plus associated InstrumentationLibrary."""
diff --git a/opentelemetry-sdk/tests/logs/test_handler.py b/opentelemetry-sdk/tests/logs/test_handler.py
index b9c40608e1..04cf5640f5 100644
--- a/opentelemetry-sdk/tests/logs/test_handler.py
+++ b/opentelemetry-sdk/tests/logs/test_handler.py
@@ -17,6 +17,7 @@
 
 from opentelemetry._logs import SeverityNumber
 from opentelemetry._logs import get_logger as APIGetLogger
+from opentelemetry.attributes import BoundedAttributes
 from opentelemetry.sdk import trace
 from opentelemetry.sdk._logs import LoggerProvider, LoggingHandler
 from opentelemetry.semconv.trace import SpanAttributes
@@ -91,6 +92,7 @@ def test_log_record_user_attributes(self):
 
         self.assertIsNotNone(log_record)
         self.assertEqual(log_record.attributes, {"http.status_code": 200})
+        self.assertTrue(isinstance(log_record.attributes, BoundedAttributes))
 
     def test_log_record_exception(self):
         """Exception information will be included in attributes"""
diff --git a/opentelemetry-sdk/tests/logs/test_log_limits.py b/opentelemetry-sdk/tests/logs/test_log_limits.py
new file mode 100644
index 0000000000..c2135b6569
--- /dev/null
+++ b/opentelemetry-sdk/tests/logs/test_log_limits.py
@@ -0,0 +1,40 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+from opentelemetry.sdk._logs import LogLimits
+from opentelemetry.sdk._logs._internal import (
+    _DEFAULT_OTEL_ATTRIBUTE_COUNT_LIMIT,
+)
+
+
+class TestLogLimits(unittest.TestCase):
+    def test_log_limits_repr_unset(self):
+        expected = f"LogLimits(max_attributes={_DEFAULT_OTEL_ATTRIBUTE_COUNT_LIMIT}, max_attribute_length=None)"
+        limits = str(LogLimits())
+
+        self.assertEqual(expected, limits)
+
+    def test_log_limits_max_attributes(self):
+        expected = 1
+        limits = LogLimits(max_attributes=1)
+
+        self.assertEqual(expected, limits.max_attributes)
+
+    def test_log_limits_max_attribute_length(self):
+        expected = 1
+        limits = LogLimits(max_attribute_length=1)
+
+        self.assertEqual(expected, limits.max_attribute_length)
diff --git a/opentelemetry-sdk/tests/logs/test_log_record.py b/opentelemetry-sdk/tests/logs/test_log_record.py
index 7142408c4c..a5993e5833 100644
--- a/opentelemetry-sdk/tests/logs/test_log_record.py
+++ b/opentelemetry-sdk/tests/logs/test_log_record.py
@@ -15,7 +15,8 @@
 import json
 import unittest
 
-from opentelemetry.sdk._logs import LogRecord
+from opentelemetry.attributes import BoundedAttributes
+from opentelemetry.sdk._logs import LogLimits, LogRecord
 
 
 class TestLogRecord(unittest.TestCase):
@@ -39,3 +40,67 @@ def test_log_record_to_json(self):
             body="a log line",
         ).to_json()
         self.assertEqual(expected, actual)
+
+    def test_log_record_bounded_attributes(self):
+        attr = {"key": "value"}
+
+        result = LogRecord(timestamp=0, body="a log line", attributes=attr)
+
+        self.assertTrue(isinstance(result.attributes, BoundedAttributes))
+
+    def test_log_record_dropped_attributes_empty_limits(self):
+        attr = {"key": "value"}
+
+        result = LogRecord(timestamp=0, body="a log line", attributes=attr)
+
+        self.assertTrue(result.dropped_attributes == 0)
+
+    def test_log_record_dropped_attributes_set_limits_max_attribute(self):
+        attr = {"key": "value", "key2": "value2"}
+        limits = LogLimits(
+            max_attributes=1,
+        )
+
+        result = LogRecord(
+            timestamp=0, body="a log line", attributes=attr, limits=limits
+        )
+        self.assertTrue(result.dropped_attributes == 1)
+
+    def test_log_record_dropped_attributes_set_limits_max_attribute_length(
+        self,
+    ):
+        attr = {"key": "value", "key2": "value2"}
+        expected = {"key": "v", "key2": "v"}
+        limits = LogLimits(
+            max_attribute_length=1,
+        )
+
+        result = LogRecord(
+            timestamp=0, body="a log line", attributes=attr, limits=limits
+        )
+        self.assertTrue(result.dropped_attributes == 0)
+        self.assertEqual(expected, result.attributes)
+
+    def test_log_record_dropped_attributes_set_limits(self):
+        attr = {"key": "value", "key2": "value2"}
+        expected = {"key2": "v"}
+        limits = LogLimits(
+            max_attributes=1,
+            max_attribute_length=1,
+        )
+
+        result = LogRecord(
+            timestamp=0, body="a log line", attributes=attr, limits=limits
+        )
+        self.assertTrue(result.dropped_attributes == 1)
+        self.assertEqual(expected, result.attributes)
+
+    def test_log_record_dropped_attributes_unset_limits(self):
+        attr = {"key": "value", "key2": "value2"}
+        limits = LogLimits()
+
+        result = LogRecord(
+            timestamp=0, body="a log line", attributes=attr, limits=limits
+        )
+        self.assertTrue(result.dropped_attributes == 0)
+        self.assertEqual(attr, result.attributes)