Edit page in Livemark
(2022-09-19 18:33)

Table Steps

These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting.

Aggregate Table

Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example)

Example

from pprint import pprint
from frictionless import Package, Resource, transform, steps

source = Resource(path="transform-groups.csv")
target = transform(
    source,
    steps=[
        steps.table_normalize(),
        steps.table_aggregate(
            group_name="name", aggregation={"sum": ("population", sum)}
        ),
    ],
)
print(target.schema)
print(target.to_view())
{'fields': [{'name': 'name', 'type': 'string'}, {'name': 'sum', 'type': 'any'}]}
+-----------+-----+
| name      | sum |
+===========+=====+
| 'france'  | 120 |
+-----------+-----+
| 'germany' | 160 |
+-----------+-----+
| 'spain'   |  80 |
+-----------+-----+

Reference

steps.table_aggregate (class)

steps.table_aggregate (class)

Aggregate table

Signature

(*, title: Optional[str] = None, description: Optional[str] = None, aggregation: dict, group_name: str) -> None

Parameters
  • title (Optional[str])
  • description (Optional[str])
  • aggregation (dict)
  • group_name (str)

steps.table_aggregate.aggregation (property)

NOTE: add docs

Signature

dict

steps.table_aggregate.group_name (property)

NOTE: add docs

Signature

str

Attach Table

Example

from pprint import pprint
from frictionless import Package, Resource, transform, steps

source = Resource(path="transform.csv")
target = transform(
    source,
    steps=[
      steps.table_attach(resource=Resource(data=[["note"], ["large"], ["mid"]])),
    ],
)
print(target.schema)
print(target.to_view())
{'fields': [{'name': 'id', 'type': 'integer'},
            {'name': 'name', 'type': 'string'},
            {'name': 'population', 'type': 'integer'},
            {'name': 'note', 'type': 'string'}]}
+----+-----------+------------+---------+
| id | name      | population | note    |
+====+===========+============+=========+
|  1 | 'germany' |         83 | 'large' |
+----+-----------+------------+---------+
|  2 | 'france'  |         66 | 'mid'   |
+----+-----------+------------+---------+
|  3 | 'spain'   |         47 | None    |
+----+-----------+------------+---------+

Reference

steps.table_attach (class)

steps.table_attach (class)

Attach table

Signature

(*, title: Optional[str] = None, description: Optional[str] = None, resource: Union[Resource, str]) -> None

Parameters
  • title (Optional[str])
  • description (Optional[str])
  • resource (Union[Resource, str])

steps.table_attach.resource (property)

NOTE: add docs

Signature

Union[Resource, str]

Debug Table

Example

from pprint import pprint
from frictionless import Package, Resource, transform, steps

source = Resource(path="transform.csv")
target = transform(
    source,
    steps=[
      steps.table_debug(function=print),
    ],
)
print(target.to_view())
{'id': 1, 'name': 'germany', 'population': 83}
{'id': 2, 'name': 'france', 'population': 66}
{'id': 3, 'name': 'spain', 'population': 47}
+----+-----------+------------+
| id | name      | population |
+====+===========+============+
|  1 | 'germany' |         83 |
+----+-----------+------------+
|  2 | 'france'  |         66 |
+----+-----------+------------+
|  3 | 'spain'   |         47 |
+----+-----------+------------+

Reference

steps.table_debug (class)

steps.table_debug (class)

Debug table

Signature

(*, title: Optional[str] = None, description: Optional[str] = None, function: Any) -> None

Parameters
  • title (Optional[str])
  • description (Optional[str])
  • function (Any)

steps.table_debug.function (property)

NOTE: add docs

Signature

Any

Diff Tables

Example

from pprint import pprint
from frictionless import Package, Resource, transform, steps

source = Resource(path="transform.csv")
target = transform(
    source,
    steps=[
        steps.table_normalize(),
        steps.table_diff(
            resource=Resource(
                data=[
                    ["id", "name", "population"],
                    [1, "germany", 83],
                    [2, "france", 50],
                    [3, "spain", 47],
                ]
            )
        ),
    ]
)
print(target.schema)
print(target.to_view())
{'fields': [{'name': 'id', 'type': 'integer'},
            {'name': 'name', 'type': 'string'},
            {'name': 'population', 'type': 'integer'}]}
+----+----------+------------+
| id | name     | population |
+====+==========+============+
|  2 | 'france' |         66 |
+----+----------+------------+

Reference

steps.table_diff (class)

steps.table_diff (class)

Diff tables

Signature

(*, title: Optional[str] = None, description: Optional[str] = None, resource: Union[Resource, str], ignore_order: bool = False, use_hash: bool = False) -> None

Parameters
  • title (Optional[str])
  • description (Optional[str])
  • resource (Union[Resource, str])
  • ignore_order (bool)
  • use_hash (bool)

steps.table_diff.resource (property)

NOTE: add docs

Signature

Union[Resource, str]

steps.table_diff.ignore_order (property)

NOTE: add docs

Signature

bool

steps.table_diff.use_hash (property)

NOTE: add docs

Signature

bool

Intersect Tables

Example

from pprint import pprint
from frictionless import Package, Resource, transform, steps

source = Resource(path="transform.csv")
target = transform(
    source,
    steps=[
        steps.table_normalize(),
        steps.table_intersect(
            resource=Resource(
                data=[
                    ["id", "name", "population"],
                    [1, "germany", 83],
                    [2, "france", 50],
                    [3, "spain", 47],
                ]
            ),
        ),
    ]
)
print(target.schema)
print(target.to_view())
{'fields': [{'name': 'id', 'type': 'integer'},
            {'name': 'name', 'type': 'string'},
            {'name': 'population', 'type': 'integer'}]}
+----+-----------+------------+
| id | name      | population |
+====+===========+============+
|  1 | 'germany' |         83 |
+----+-----------+------------+
|  3 | 'spain'   |         47 |
+----+-----------+------------+

Reference

steps.table_intersect (class)

steps.table_intersect (class)

Intersect tables

Signature

(*, title: Optional[str] = None, description: Optional[str] = None, resource: Union[Resource, str], use_hash: bool = False) -> None

Parameters
  • title (Optional[str])
  • description (Optional[str])
  • resource (Union[Resource, str])
  • use_hash (bool)

steps.table_intersect.resource (property)

NOTE: add docs

Signature

Union[Resource, str]

steps.table_intersect.use_hash (property)

NOTE: add docs

Signature

bool

Join Tables

Example

from pprint import pprint
from frictionless import Package, Resource, transform, steps

source = Resource(path="transform.csv")
target = transform(
    source,
    steps=[
        steps.table_normalize(),
        steps.table_join(
            resource=Resource(data=[["id", "note"], [1, "beer"], [2, "vine"]]),
            field_name="id",
        ),
    ]
)
print(target.schema)
print(target.to_view())
{'fields': [{'name': 'id', 'type': 'integer'},
            {'name': 'name', 'type': 'string'},
            {'name': 'population', 'type': 'integer'},
            {'name': 'note', 'type': 'string'}]}
+----+-----------+------------+--------+
| id | name      | population | note   |
+====+===========+============+========+
|  1 | 'germany' |         83 | 'beer' |
+----+-----------+------------+--------+
|  2 | 'france'  |         66 | 'vine' |
+----+-----------+------------+--------+

Reference

steps.table_join (class)

steps.table_join (class)

Join tables

Signature

(*, title: Optional[str] = None, description: Optional[str] = None, resource: Union[Resource, str], field_name: Optional[str] = None, use_hash: bool = False, mode: str = inner) -> None

Parameters
  • title (Optional[str])
  • description (Optional[str])
  • resource (Union[Resource, str])
  • field_name (Optional[str])
  • use_hash (bool)
  • mode (str)

steps.table_join.resource (property)

NOTE: add docs

Signature

Union[Resource, str]

steps.table_join.field_name (property)

NOTE: add docs

Signature

Optional[str]

steps.table_join.use_hash (property)

NOTE: add docs

Signature

bool

steps.table_join.mode (property)

NOTE: add docs

Signature

str

Melt Table

Example

from pprint import pprint
from frictionless import Package, Resource, transform, steps

source = Resource(path="transform.csv")
target = transform(
    source,
    steps=[
        steps.table_normalize(),
        steps.table_melt(field_name="name"),
    ]
)
print(target.schema)
print(target.to_view())
{'fields': [{'name': 'name', 'type': 'string'},
            {'name': 'variable', 'type': 'string'},
            {'name': 'value', 'type': 'any'}]}
+-----------+--------------+-------+
| name      | variable     | value |
+===========+==============+=======+
| 'germany' | 'id'         |     1 |
+-----------+--------------+-------+
| 'germany' | 'population' |    83 |
+-----------+--------------+-------+
| 'france'  | 'id'         |     2 |
+-----------+--------------+-------+
| 'france'  | 'population' |    66 |
+-----------+--------------+-------+
| 'spain'   | 'id'         |     3 |
+-----------+--------------+-------+
...

Reference

steps.table_melt (class)

steps.table_melt (class)

Melt tables

Signature

(*, title: Optional[str] = None, description: Optional[str] = None, field_name: str, variables: Optional[str] = None, to_field_names: List[str] = NOTHING) -> None

Parameters
  • title (Optional[str])
  • description (Optional[str])
  • field_name (str)
  • variables (Optional[str])
  • to_field_names (List[str])

steps.table_melt.field_name (property)

NOTE: add docs

Signature

str

steps.table_melt.variables (property)

NOTE: add docs

Signature

Optional[str]

steps.table_melt.to_field_names (property)

NOTE: add docs

Signature

List[str]

Merge Tables

Example

from pprint import pprint
from frictionless import Package, Resource, transform, steps

source = Resource(path="transform.csv")
target = transform(
    source,
    steps=[
        steps.table_merge(
            resource=Resource(data=[["id", "name", "note"], [4, "malta", "island"]])
        ),
    ]
)
print(target.schema)
print(target.to_view())

Reference

steps.table_merge (class)

steps.table_merge (class)

Merge tables

Signature

(*, title: Optional[str] = None, description: Optional[str] = None, resource: Union[Resource, str], field_names: List[str] = NOTHING, sort_by_field: Optional[str] = None, ignore_fields: bool = False) -> None

Parameters
  • title (Optional[str])
  • description (Optional[str])
  • resource (Union[Resource, str])
  • field_names (List[str])
  • sort_by_field (Optional[str])
  • ignore_fields (bool)

steps.table_merge.resource (property)

NOTE: add docs

Signature

Union[Resource, str]

steps.table_merge.field_names (property)

NOTE: add docs

Signature

List[str]

steps.table_merge.sort_by_field (property)

NOTE: add docs

Signature

Optional[str]

steps.table_merge.ignore_fields (property)

NOTE: add docs

Signature

bool

Normalize Table

The table_normalize step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it's recommended to normalize a table before any others steps.

Example

from pprint import pprint
from frictionless import Package, Resource, transform, steps

source = Resource("table.csv")
print(source.read_cells())
target = transform(
    source,
    steps=[
        steps.table_normalize(),
    ]
)
print(target.read_cells())
[['id', 'name'], ['1', 'english'], ['2', '中国人']]
[['id', 'name'], [1, 'english'], [2, '中国人']]

Reference

steps.table_normalize (class)

steps.table_normalize (class)

Normalize table

Signature

(*, title: Optional[str] = None, description: Optional[str] = None) -> None

Parameters
  • title (Optional[str])
  • description (Optional[str])

steps.table_normalize.type (property)

Signature

ClassVar[str]

steps.table_normalize.title (property)

Signature

Optional[str]

steps.table_normalize.description (property)

Signature

Optional[str]

Pivot Table

Example

from pprint import pprint
from frictionless import Package, Resource, transform, steps

source = Resource(path="transform-pivot.csv")
target = transform(
    source,
    steps=[
        steps.table_normalize(),
        steps.table_pivot(f1="region", f2="gender", f3="units", aggfun=sum),
    ]
)
print(target.schema)
print(target.to_view())

Reference

steps.table_pivot (class)

steps.table_pivot (class)

Pivot table

Signature

(*, title: Optional[str] = None, description: Optional[str] = None, f1: str, f2: str, f3: str, aggfun: Any) -> None

Parameters
  • title (Optional[str])
  • description (Optional[str])
  • f1 (str)
  • f2 (str)
  • f3 (str)
  • aggfun (Any)

steps.table_pivot.f1 (property)

NOTE: add docs

Signature

str

steps.table_pivot.f2 (property)

NOTE: add docs

Signature

str

steps.table_pivot.f3 (property)

NOTE: add docs

Signature

str

steps.table_pivot.aggfun (property)

NOTE: add docs

Signature

Any

Print Table

Example

from pprint import pprint
from frictionless import Package, Resource, transform, steps

source = Resource(path="transform.csv")
target = transform(
    source,
    steps=[
        steps.table_normalize(),
        steps.table_print(),
    ]
)
==  =======  ==========
id  name     population
==  =======  ==========
 1  germany          83
 2  france           66
 3  spain            47
==  =======  ==========

Reference

steps.table_print (class)

steps.table_print (class)

Print table

Signature

(*, title: Optional[str] = None, description: Optional[str] = None) -> None

Parameters
  • title (Optional[str])
  • description (Optional[str])

steps.table_print.type (property)

Signature

ClassVar[str]

steps.table_print.title (property)

Signature

Optional[str]

steps.table_print.description (property)

Signature

Optional[str]

Recast Table

Example

from pprint import pprint
from frictionless import Package, Resource, transform, steps

source = Resource(path="transform.csv")
target = transform(
    source,
    steps=[
        steps.table_normalize(),
        steps.table_melt(field_name="id"),
        steps.table_recast(field_name="id"),
    ]
)
print(target.schema)
print(target.to_view())
{'fields': [{'name': 'id', 'type': 'integer'},
            {'name': 'name', 'type': 'string'},
            {'name': 'population', 'type': 'integer'}]}
+----+-----------+------------+
| id | name      | population |
+====+===========+============+
|  1 | 'germany' |         83 |
+----+-----------+------------+
|  2 | 'france'  |         66 |
+----+-----------+------------+
|  3 | 'spain'   |         47 |
+----+-----------+------------+

Reference

steps.table_recast (class)

steps.table_recast (class)

Recast table

Signature

(*, title: Optional[str] = None, description: Optional[str] = None, field_name: str, from_field_names: List[str] = NOTHING) -> None

Parameters
  • title (Optional[str])
  • description (Optional[str])
  • field_name (str)
  • from_field_names (List[str])

steps.table_recast.field_name (property)

NOTE: add docs

Signature

str

steps.table_recast.from_field_names (property)

NOTE: add docs

Signature

List[str]

Transpose Table

Example

from pprint import pprint
from frictionless import Package, Resource, transform, steps

source = Resource(path="transform.csv")
target = transform(
    source,
    steps=[
        steps.table_normalize(),
        steps.table_transpose(),
    ]
)
print(target.schema)
print(target.to_view())
{'fields': [{'name': 'id', 'type': 'string'},
            {'name': '1', 'type': 'any'},
            {'name': '2', 'type': 'any'},
            {'name': '3', 'type': 'any'}]}
+--------------+-----------+----------+---------+
| id           | 1         | 2        | 3       |
+==============+===========+==========+=========+
| 'name'       | 'germany' | 'france' | 'spain' |
+--------------+-----------+----------+---------+
| 'population' |        83 |       66 |      47 |
+--------------+-----------+----------+---------+

Reference

steps.table_transpose (class)

steps.table_transpose (class)

Transpose table

Signature

(*, title: Optional[str] = None, description: Optional[str] = None) -> None

Parameters
  • title (Optional[str])
  • description (Optional[str])

steps.table_transpose.type (property)

Signature

ClassVar[str]

steps.table_transpose.title (property)

Signature

Optional[str]

steps.table_transpose.description (property)

Signature

Optional[str]

Validate Table

Example

from pprint import pprint
from frictionless import Package, Resource, transform, steps

source = Resource(path="transform.csv")
target = transform(
    source,
    steps=[
        steps.cell_set(field_name="population", value="bad"),
        steps.table_validate(),
    ]
)
pprint(target.schema)
try:
  pprint(target.to_view())
except Exception as exception:
  pprint(exception)
{'fields': [{'name': 'id', 'type': 'integer'},
            {'name': 'name', 'type': 'string'},
            {'name': 'population', 'type': 'integer'}]}
FrictionlessException('[step-error] Step is not valid: "table_validate" raises "[type-error] Type error in the cell "bad" in row "2" and field "population" at position "3": type is "integer/default" " ')

Reference

steps.table_validate (class)

steps.table_validate (class)

Validate table

Signature

(*, title: Optional[str] = None, description: Optional[str] = None) -> None

Parameters
  • title (Optional[str])
  • description (Optional[str])

steps.table_validate.type (property)

Signature

ClassVar[str]

steps.table_validate.title (property)

Signature

Optional[str]

steps.table_validate.description (property)

Signature

Optional[str]

Write Table

Example

from pprint import pprint
from frictionless import Package, Resource, transform, steps

source = Resource(path="transform.csv")
target = transform(
    source,
    steps=[
        steps.table_write(path='transform.json'),
    ]
)

Let's read the output:

cat transform.json
[
  [
    "id",
    "name",
    "population"
  ],
  [
    1,
    "germany",
    83
  ],
  [
    2,
    "france",
    66
  ],
  [
    3,
    "spain",
    47
  ]
]
with open('transform.json') as file:
    print(file.read())
[
  [
    "id",
    "name",
    "population"
  ],
  [
    1,
    "germany",
    83
  ],
  [
    2,
    "france",
    66
  ],
  [
    3,
    "spain",
    47
  ]
]

Reference

steps.table_write (class)

steps.table_write (class)

Write table

Signature

(*, title: Optional[str] = None, description: Optional[str] = None, path: str) -> None

Parameters
  • title (Optional[str])
  • description (Optional[str])
  • path (str)

steps.table_write.path (property)

NOTE: add docs

Signature

str

It's a beta version of Frictionless Framework (v5). Read Frictionless Framework (v4) docs for a version that is currently installed by default by pip.