Skip to content

.activity_pub.muck_out

cattle_grid.activity_pub.muck_out

This package contains tools to turn ActivityPub messages into something cleaner. Its takes include

  • Normalization
  • Validation

attachment

normalize_attachment(attachment)

Normalizes an attachment

Source code in cattle_grid/activity_pub/muck_out/attachment.py
def normalize_attachment(attachment: dict) -> dict:
    """Normalizes an attachment"""

    if attachment.get("type") != "Document":
        return attachment

    media_type = attachment.get("mediaType")
    if media_type is None:
        return attachment

    if media_type.startswith("image/"):
        attachment["type"] = "Image"
    if media_type.startswith("audio/"):
        attachment["type"] = "Audio"
    if media_type.startswith("video/"):
        attachment["type"] = "Video"

    return attachment

normalize

Routings to normalize an ActivityPub activity

normalize_activity(activity, actor=None)

Normalizes activities.

Parameters:

Name Type Description Default
activity dict

The activity being normalized

required
actor str | None

Actor receiving this activity

None

Returns:

Type Description
Activity
Source code in cattle_grid/activity_pub/muck_out/normalize.py
def normalize_activity(activity: dict, actor: str | None = None) -> Activity:
    """
    Normalizes activities.

    :param activity: The activity being normalized
    :param actor: Actor receiving this activity
    :returns:
    """
    try:
        obj = activity.get("object")
        if isinstance(obj, dict):
            try:
                obj = normalize_object(obj)
            except Exception:
                if isinstance(obj, dict):
                    obj = obj.get("id")

        return Activity.model_validate(
            {
                "@context": activity.get("@context"),
                "id": normalize_id(activity),
                "type": activity.get("type"),
                "actor": id_for_object(activity.get("actor")),
                "object": obj,
                "to": normalize_to(activity.get("to"), actor),
                "cc": list_from_value(activity.get("cc")),
                "published": activity.get("published"),
                "target": activity.get("target"),
                "content": activity.get("content"),
            }
        )
    except Exception as e:
        logger.exception(e)
        logger.info(activity)

        raise e

normalize_id(activity)

Creates a normalized id

>>> normalize_id({"id": "http://id.example"})
'http://id.example'

>>> normalize_id({})
Traceback (most recent call last):
    ...
ValueError: Cannot fake id if actor is not present
Source code in cattle_grid/activity_pub/muck_out/normalize.py
def normalize_id(activity):
    """
    Creates a normalized id

    ```pycon
    >>> normalize_id({"id": "http://id.example"})
    'http://id.example'

    >>> normalize_id({})
    Traceback (most recent call last):
        ...
    ValueError: Cannot fake id if actor is not present

    ```
    """
    result = activity.get("id")
    if result is not None:
        return result
    actor_id = id_for_object(activity.get("actor"))

    if actor_id is None:
        raise ValueError("Cannot fake id if actor is not present")

    return f"{actor_id}#fake_id" + str(uuid7())

normalize_object(obj)

Normalizes an object

Parameters:

Name Type Description Default
obj dict

The object to be normalized

required

Returns:

Type Description
Object
Source code in cattle_grid/activity_pub/muck_out/normalize.py
def normalize_object(obj: dict) -> Object:
    """Normalizes an object

    :params obj: The object to be normalized
    :returns:
    """
    return Object.model_validate(
        {
            "@context": obj.get("@context"),
            "id": obj.get("id"),
            "type": obj.get("type"),
            "attributedTo": id_for_object(obj.get("attributedTo")),
            "obj": obj.get("obj"),
            "to": list_from_value(obj.get("to")),
            "cc": list_from_value(obj.get("cc")),
            "published": obj.get("published"),
            "target": obj.get("target"),
            "content": sanitize_html(obj.get("content")),
            "attachment": normalize_attachments(obj.get("attachment")),
            "sensitive": obj.get("sensitive"),
            "summary": sanitize_html(obj.get("summary")),
            "tag": list_from_value(obj.get("tag")),
            "url": normalize_url(obj.get("url")),
            "inReplyTo": id_for_object(obj.get("inReplyTo")),
        }
    )

normalize_to(value, actor)

Normalizes the to value

>>> normalize_to(None, "http://actor.example")
['http://actor.example']

>>> normalize_to("http://to.example", None)
['http://to.example']

>>> normalize_to(["http://alice.example", "http://bob.example"], None)
['http://alice.example', 'http://bob.example']
Source code in cattle_grid/activity_pub/muck_out/normalize.py
def normalize_to(value, actor):
    """Normalizes the to value

    ```pycon
    >>> normalize_to(None, "http://actor.example")
    ['http://actor.example']

    >>> normalize_to("http://to.example", None)
    ['http://to.example']

    >>> normalize_to(["http://alice.example", "http://bob.example"], None)
    ['http://alice.example', 'http://bob.example']

    ```
    """
    if value is None:
        return [actor]
    return list_from_value(value)

types

Activity

Bases: Common

This represents a first draft of a json-schema that every activities exchanged between servers MUST satisfy and be able to parse. Here ‘being able to parse’ means making it to the point, where depending on the type, you decide what side effects to perform.

Generally, the fields actor, to, and cc (and maybe bcc — not transported) represent how the message is being delivered. The fields actor, type, object, target, content represent how the message is processed by the server.

Parameters:

Name Type Description Default
field_context str | List[Any] | None
None
id str

id of the activity, can be assumed to be globally unique. Some activities such as a Follow request will require an id to be valid. Servers may assume an id to be required. As assigning an id is ‘trivial’, one should assign one.

required
to List[str]

Array of actors this activity is addressed to. It is sane to assume that an activity is addressed to at least one person.

required
cc List[str] | None

Array of actors this activity or object is carbon copied to.

None
published str | None

Moment of this activity or object being published

None
type str

Type of the activity. Side effects of this activity are determine by this type.

required
actor str

id of the actor performing this activity. One can assume that the activity is signed by this actor (in some form).

required
object str | Object | None
None
target str | Dict[str, Any] | None

The target, not sure if needed, included for completeness

None
content str | None

The content used for example to represent the Emote for a like

None
Source code in cattle_grid/activity_pub/muck_out/types.py
class Activity(Common):
    """
    This represents a first draft of a json-schema that every activities exchanged between servers MUST satisfy and be able to parse. Here 'being able to parse' means making it to the point, where depending on the type, you decide what side effects to perform.

    Generally, the fields actor, to, and cc (and maybe bcc --- not transported) represent how the message is being delivered. The fields actor, type, object, target, content represent how the message is processed by the server.
    """

    actor: str = Field(
        ...,
        examples=["https://actor.example/"],
        description="""
    id of the actor performing this activity. One can assume that the activity is signed by this actor (in some form).
    """,
    )
    object: str | Object | None = Field(None)
    target: str | Dict[str, Any] | None = Field(
        None,
        examples=[
            "https://other.example/target_id",
            {"type": "Note", "content": "meow"},
        ],
        description="""
    The target, not sure if needed, included for completeness
    """,
    )
    content: str | None = Field(
        None,
        examples=["🐮", "❤️"],
        description="""
    The content used for example to represent the Emote for a like
    """,
    )
    id: str = Field(
        ...,
        examples=["https://actor.example/some_id"],
        description="""
    id of the activity, can be assumed to be globally unique. Some activities such as a Follow request will require an id to be valid. Servers may assume an id to be required. As assigning an id is 'trivial', one should assign one.
    """,
    )
    type: str = Field(
        ...,
        examples=["Follow", "Accept", "Create", "Undo", "Like"],
        description="""
    Type of the activity. Side effects of this activity are determine by this type.
    """,
    )
    to: List[str] = Field(
        ...,
        examples=[
            ["https://bob.example"],
            ["https://alice.example", "https://bob.example"],
        ],
        min_length=1,
        description="""
    Array of actors this activity is addressed to. It is sane to assume that an activity is addressed to at least one person.
    """,
    )

Common

Bases: BaseModel

Parameters:

Name Type Description Default
field_context str | List[Any] | None
None
id str

id of the activity or object, can be assumed to be globally unique. Some activities such as a Follow request will require an id to be valid. Servers may assume an id to be required. As assigning an id is ‘trivial’, one should assign one.

required
to List[str]

Array of actors this activity or object is addressed to. It is sane to assume that an activity is addressed to at least one person.

required
cc List[str] | None

Array of actors this activity or object is carbon copied to.

None
published str | None

Moment of this activity or object being published

None
type str

Type of the activity or activity. Side effects of this activity are determine by this type.

required
Source code in cattle_grid/activity_pub/muck_out/types.py
class Common(BaseModel):
    field_context: str | List[Any] | None = Field(
        None,
        alias="@context",
        examples=[
            "https://www.w3.org/ns/activitystreams",
            ["https://www.w3.org/ns/activitystreams", {"Hashtag": "as:Hashtag"}],
        ],
    )
    id: str = Field(
        ...,
        examples=["https://actor.example/some_id"],
        description="""
    id of the activity or object, can be assumed to be globally unique. Some activities such as a Follow request will require an id to be valid. Servers may assume an id to be required. As assigning an id is 'trivial', one should assign one.
    """,
    )
    to: List[str] = Field(
        ...,
        examples=[
            ["https://bob.example"],
            ["https://alice.example", "https://bob.example"],
        ],
        min_length=1,
        description="""
    Array of actors this activity or object is addressed to. It is sane to assume that an activity is addressed to at least one person.
    """,
    )
    cc: List[str] | None = Field(
        None,
        examples=[
            ["https://bob.example"],
            ["https://alice.example", "https://bob.example"],
        ],
        description="""
    Array of actors this activity or object is carbon copied to.
    """,
    )
    published: str | None = Field(
        None,
        description="""
    Moment of this activity or object being published
    """,
    )
    type: str = Field(
        ...,
        examples=["Follow", "Accept", "Create", "Undo", "Like", "Note"],
        description="""
    Type of the activity or activity. Side effects of this activity are determine by this type.
    """,
    )

Object

Bases: Common

Parameters:

Name Type Description Default
field_context str | List[Any] | None
None
id str

id of the activity, can be assumed to be globally unique. Some activities such as a Follow request will require an id to be valid. Servers may assume an id to be required. As assigning an id is ‘trivial’, one should assign one.

required
to List[str]

Array of actors this activity is addressed to. It is sane to assume that an activity is addressed to at least one person.

required
cc List[str] | None

Array of actors this activity or object is carbon copied to.

None
published str | None

Moment of this activity or object being published

None
type str

Type of the activity. Side effects of this activity are determine by this type.

required
attributedTo str | None

id of the actor that authored this object

None
content str
required
summary str | None
None
name str | None
None
attachment List[Dict[str, Any]] | None
None
tag List[Dict[str, Any]] | None
None
url List[Union[Dict[str, Any], str]] | None
None
sensitive bool | None
None
inReplyTo str | None
None
Source code in cattle_grid/activity_pub/muck_out/types.py
class Object(Common):
    attributedTo: str | None = Field(
        None,
        examples=["https://actor.example/"],
        description="""
    id of the actor that authored this object
    """,
    )
    content: str
    """
    The content of the object
    """
    summary: str | None = None
    """
    The summary of the object
    """
    name: str | None = None
    """
    The name of the object
    """
    attachment: List[Dict[str, Any]] | None = None
    """
    A list of objects that are attached to the original object
    """
    tag: List[Dict[str, Any]] | None = None
    """
    A list of objects that expand on the content of the object
    """
    url: List[Dict[str, Any] | str] | None = None
    """
    A list of urls that expand on the content of the object
    """
    sensitive: bool | None = None
    """
    Marks the object as sensitive. Currently, used by everyone, a better way would be an element of the tag list that labels the object as sensitive due a reason
    """
    inReplyTo: str | None = None
    """
    The object being replied to. Currently a string. Not sure if this is what I want.
    """
    id: str = Field(
        ...,
        examples=["https://actor.example/some_id"],
        description="""
    id of the activity, can be assumed to be globally unique. Some activities such as a Follow request will require an id to be valid. Servers may assume an id to be required. As assigning an id is 'trivial', one should assign one.
    """,
    )
    type: str = Field(
        ...,
        examples=["Follow", "Accept", "Create", "Undo", "Like"],
        description="""
    Type of the activity. Side effects of this activity are determine by this type.
    """,
    )
    to: List[str] = Field(
        ...,
        examples=[
            ["https://bob.example"],
            ["https://alice.example", "https://bob.example"],
        ],
        min_length=1,
        description="""
    Array of actors this activity is addressed to. It is sane to assume that an activity is addressed to at least one person.
    """,
    )
attachment = None class-attribute instance-attribute

A list of objects that are attached to the original object

content instance-attribute

The content of the object

inReplyTo = None class-attribute instance-attribute

The object being replied to. Currently a string. Not sure if this is what I want.

name = None class-attribute instance-attribute

The name of the object

sensitive = None class-attribute instance-attribute

Marks the object as sensitive. Currently, used by everyone, a better way would be an element of the tag list that labels the object as sensitive due a reason

summary = None class-attribute instance-attribute

The summary of the object

tag = None class-attribute instance-attribute

A list of objects that expand on the content of the object

url = None class-attribute instance-attribute

A list of urls that expand on the content of the object

utils

allowed_html_tags = ['a', 'abbr', 'acronym', 'b', 'blockquote', 'code', 'em', 'i', 'li', 'ol', 'strong', 'ul', 'p', 'br', 'span', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr'] module-attribute

The currently allowed list of html tags

list_from_value(value)

Transforms a list into a value

>>> list_from_value(["aaa"])
['aaa']

>>> list_from_value("aaa")
['aaa']

>>> list_from_value({"a": 1})
[{'a': 1}]

>>> list_from_value([])

>>> list_from_value(None)

Returns:

Type Description
List[Any] | None

A list or None in case of an empty list or None as argument

Source code in cattle_grid/activity_pub/muck_out/utils.py
def list_from_value(value: Any) -> List[Any] | None:
    """Transforms a list into a value

    ```pycon
    >>> list_from_value(["aaa"])
    ['aaa']

    >>> list_from_value("aaa")
    ['aaa']

    >>> list_from_value({"a": 1})
    [{'a': 1}]

    >>> list_from_value([])

    >>> list_from_value(None)

    ```


    :returns: A list or None in case of an empty list or None as argument

    """

    if isinstance(value, list):
        if len(value) == 0:
            return None
        return value
    if isinstance(value, str) or isinstance(value, dict):
        return [value]

    return None

sanitize_html(value)

Cleans html

>>> sanitize_html("<p>text</p>")
'<p>text</p>'

>>> sanitize_html("<script>alert('xss')</script>")
"&lt;script&gt;alert('xss')&lt;/script&gt;"
Source code in cattle_grid/activity_pub/muck_out/utils.py
def sanitize_html(value):
    """Cleans html

    ```pycon
    >>> sanitize_html("<p>text</p>")
    '<p>text</p>'

    >>> sanitize_html("<script>alert('xss')</script>")
    "&lt;script&gt;alert('xss')&lt;/script&gt;"

    ```
    """
    if isinstance(value, str):
        return bleach.clean(value, tags=allowed_html_tags, strip=False)
    return None