You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

73 lines
2.0 KiB

import io
import os
import pytest
from nltk.util import filestring
def test_reads_allowed_file(tmp_path):
"""filestring should read files inside allowed_dir"""
allowed_dir = tmp_path / "allowed"
allowed_dir.mkdir()
f = allowed_dir / "example.txt"
f.write_text("hello world")
output = filestring(str(f), allowed_dir=str(allowed_dir))
assert output == "hello world"
def test_rejects_parent_traversal(tmp_path):
"""filestring should block ../ traversal attempts"""
allowed = tmp_path / "allowed"
allowed.mkdir()
secret = tmp_path / "secret.txt"
secret.write_text("topsecret")
# simulate ../ traversal
traversal_path = str(allowed / ".." / "secret.txt")
with pytest.raises(PermissionError):
filestring(traversal_path, allowed_dir=str(allowed))
def test_rejects_symlink_escape(tmp_path):
"""filestring should block symlink pointing outside allowed_dir"""
allowed = tmp_path / "allowed"
allowed.mkdir()
outside = tmp_path / "outside.txt"
outside.write_text("hidden-data")
link = allowed / "link.txt"
# On Windows, symlink creation may require admin — skip cleanly if not allowed
try:
link.symlink_to(outside)
except (OSError, NotImplementedError):
pytest.skip("Symlink creation not supported on this platform")
with pytest.raises(PermissionError):
filestring(str(link), allowed_dir=str(allowed))
def test_preserves_file_like_objects():
"""filestring should maintain legacy behavior for stream-like objects"""
stream = io.StringIO("stream-data")
assert filestring(stream) == "stream-data"
def test_encoding_fallback(tmp_path):
"""filestring should tolerate decoding errors when reading files"""
allowed = tmp_path / "allowed"
allowed.mkdir()
f = allowed / "latin1.txt"
f.write_bytes(b"caf\xe9") # invalid UTF-8 sequence
output = filestring(str(f), allowed_dir=str(allowed))
assert isinstance(output, str)
assert "caf" in output # partial decode allowed via errors="ignore"