Dix issues with getting id (#3556)

* fix issues with getting id

* ignore linter

* fix: resolve ruff linting issues in tracing utils

---------

Co-authored-by: Greyson LaLonde <greyson.r.lalonde@gmail.com>
This commit is contained in:
João Moura
2025-09-20 15:29:25 -03:00
committed by GitHub
parent 7426969736
commit 4951d30dd9
2 changed files with 272 additions and 28 deletions

View File

@@ -54,44 +54,164 @@ def _get_machine_id() -> str:
[f"{(uuid.getnode() >> b) & 0xFF:02x}" for b in range(0, 12, 2)][::-1]
)
parts.append(mac)
except Exception:
logger.warning("Error getting machine id for fingerprinting")
except Exception: # noqa: S110
pass
sysname = platform.system()
parts.append(sysname)
try:
sysname = platform.system()
parts.append(sysname)
except Exception:
sysname = "unknown"
parts.append(sysname)
try:
if sysname == "Darwin":
res = subprocess.run(
["/usr/sbin/system_profiler", "SPHardwareDataType"],
capture_output=True,
text=True,
timeout=2,
)
m = re.search(r"Hardware UUID:\s*([A-Fa-f0-9\-]+)", res.stdout)
if m:
parts.append(m.group(1))
elif sysname == "Linux":
try:
parts.append(Path("/etc/machine-id").read_text().strip())
except Exception:
parts.append(Path("/sys/class/dmi/id/product_uuid").read_text().strip())
res = subprocess.run(
["/usr/sbin/system_profiler", "SPHardwareDataType"],
capture_output=True,
text=True,
timeout=2,
)
m = re.search(r"Hardware UUID:\s*([A-Fa-f0-9\-]+)", res.stdout)
if m:
parts.append(m.group(1))
except Exception: # noqa: S110
pass
elif sysname == "Linux":
linux_id = _get_linux_machine_id()
if linux_id:
parts.append(linux_id)
elif sysname == "Windows":
res = subprocess.run(
["C:\\Windows\\System32\\wbem\\wmic.exe", "csproduct", "get", "UUID"],
capture_output=True,
text=True,
timeout=2,
)
lines = [line.strip() for line in res.stdout.splitlines() if line.strip()]
if len(lines) >= 2:
parts.append(lines[1])
except Exception:
logger.exception("Error getting machine ID")
try:
res = subprocess.run(
[
"C:\\Windows\\System32\\wbem\\wmic.exe",
"csproduct",
"get",
"UUID",
],
capture_output=True,
text=True,
timeout=2,
)
lines = [
line.strip() for line in res.stdout.splitlines() if line.strip()
]
if len(lines) >= 2:
parts.append(lines[1])
except Exception: # noqa: S110
pass
else:
generic_id = _get_generic_system_id()
if generic_id:
parts.append(generic_id)
except Exception: # noqa: S110
pass
if len(parts) <= 1:
try:
import socket
parts.append(socket.gethostname())
except Exception: # noqa: S110
pass
try:
parts.append(getpass.getuser())
except Exception: # noqa: S110
pass
try:
parts.append(platform.machine())
parts.append(platform.processor())
except Exception: # noqa: S110
pass
if not parts:
parts.append("unknown-system")
parts.append(str(uuid.uuid4()))
return hashlib.sha256("".join(parts).encode()).hexdigest()
def _get_linux_machine_id() -> str | None:
linux_id_sources = [
"/etc/machine-id",
"/sys/class/dmi/id/product_uuid",
"/proc/sys/kernel/random/boot_id",
"/sys/class/dmi/id/board_serial",
"/sys/class/dmi/id/chassis_serial",
]
for source in linux_id_sources:
try:
path = Path(source)
if path.exists() and path.is_file():
content = path.read_text().strip()
if content and content.lower() not in [
"unknown",
"to be filled by o.e.m.",
"",
]:
return content
except Exception: # noqa: S112, PERF203
continue
try:
import socket
hostname = socket.gethostname()
arch = platform.machine()
if hostname and arch:
return f"{hostname}-{arch}"
except Exception: # noqa: S110
pass
return None
def _get_generic_system_id() -> str | None:
try:
parts = []
try:
import socket
hostname = socket.gethostname()
if hostname:
parts.append(hostname)
except Exception: # noqa: S110
pass
try:
parts.append(platform.machine())
parts.append(platform.processor())
parts.append(platform.architecture()[0])
except Exception: # noqa: S110
pass
try:
container_id = os.environ.get(
"HOSTNAME", os.environ.get("CONTAINER_ID", "")
)
if container_id:
parts.append(container_id)
except Exception: # noqa: S110
pass
if parts:
return "-".join(filter(None, parts))
except Exception: # noqa: S110
pass
return None
def _user_data_file() -> Path:
base = Path(db_storage_path())
base.mkdir(parents=True, exist_ok=True)

View File

@@ -0,0 +1,124 @@
"""Tests for the machine ID generation functionality in tracing utils."""
from pathlib import Path
from unittest.mock import patch
from crewai.events.listeners.tracing.utils import (
_get_generic_system_id,
_get_linux_machine_id,
_get_machine_id,
)
def test_get_machine_id_basic():
"""Test that _get_machine_id always returns a valid SHA256 hash."""
machine_id = _get_machine_id()
# Should return a 64-character hex string (SHA256)
assert isinstance(machine_id, str)
assert len(machine_id) == 64
assert all(c in "0123456789abcdef" for c in machine_id)
def test_get_machine_id_handles_missing_files():
"""Test that _get_machine_id handles FileNotFoundError gracefully."""
with patch.object(Path, "read_text", side_effect=FileNotFoundError):
machine_id = _get_machine_id()
# Should still return a valid hash even when files are missing
assert isinstance(machine_id, str)
assert len(machine_id) == 64
assert all(c in "0123456789abcdef" for c in machine_id)
def test_get_machine_id_handles_permission_errors():
"""Test that _get_machine_id handles PermissionError gracefully."""
with patch.object(Path, "read_text", side_effect=PermissionError):
machine_id = _get_machine_id()
# Should still return a valid hash even with permission errors
assert isinstance(machine_id, str)
assert len(machine_id) == 64
assert all(c in "0123456789abcdef" for c in machine_id)
def test_get_machine_id_handles_mac_address_failure():
"""Test that _get_machine_id works even if MAC address retrieval fails."""
with patch("uuid.getnode", side_effect=Exception("MAC address error")):
machine_id = _get_machine_id()
# Should still return a valid hash even without MAC address
assert isinstance(machine_id, str)
assert len(machine_id) == 64
assert all(c in "0123456789abcdef" for c in machine_id)
def test_get_linux_machine_id_handles_missing_files():
"""Test that _get_linux_machine_id handles missing files gracefully."""
with patch.object(Path, "exists", return_value=False):
result = _get_linux_machine_id()
# Should return something (hostname-arch fallback) or None
assert result is None or isinstance(result, str)
def test_get_linux_machine_id_handles_file_read_errors():
"""Test that _get_linux_machine_id handles file read errors."""
with (
patch.object(Path, "exists", return_value=True),
patch.object(Path, "is_file", return_value=True),
patch.object(Path, "read_text", side_effect=FileNotFoundError),
):
result = _get_linux_machine_id()
# Should fallback to hostname-based ID or None
assert result is None or isinstance(result, str)
def test_get_generic_system_id_basic():
"""Test that _get_generic_system_id returns reasonable values."""
result = _get_generic_system_id()
# Should return a string or None
assert result is None or isinstance(result, str)
# If it returns a string, it should be non-empty
if result:
assert len(result) > 0
def test_get_generic_system_id_handles_socket_errors():
"""Test that _get_generic_system_id handles socket errors gracefully."""
with patch("socket.gethostname", side_effect=Exception("Socket error")):
result = _get_generic_system_id()
# Should still work or return None
assert result is None or isinstance(result, str)
def test_machine_id_consistency():
"""Test that machine ID is consistent across multiple calls."""
machine_id1 = _get_machine_id()
machine_id2 = _get_machine_id()
# Should be the same across calls (stable fingerprint)
assert machine_id1 == machine_id2
def test_machine_id_always_has_fallback():
"""Test that machine ID always generates something even in worst case."""
with (
patch("uuid.getnode", side_effect=Exception),
patch("platform.system", side_effect=Exception),
patch("socket.gethostname", side_effect=Exception),
patch("getpass.getuser", side_effect=Exception),
patch("platform.machine", side_effect=Exception),
patch("platform.processor", side_effect=Exception),
patch.object(Path, "read_text", side_effect=FileNotFoundError),
):
machine_id = _get_machine_id()
# Even in worst case, should return a valid hash
assert isinstance(machine_id, str)
assert len(machine_id) == 64
assert all(c in "0123456789abcdef" for c in machine_id)