Document endpoints#

Danger

The /documents/ endpoints are deprecated and being replaced by /processing endpoints based on workflows. This document is meant as a maintenance reference for pre-existing client implementations.

For historical and compatibility reasons, besides the processing, you can also upload documents for processing by issuing an HTTP POST request to the /documents endpoint.

Uploading a document#

Info

Similar technical limitations as for the /processing endpoint apply.

An upload request needs to be authenticated using an access_token, or preferably with an API key.

A document upload could look like this:

API keyAccess Token

PythonCurlJava

import requests

headers = {
    'accept': 'application/json',
    'Authorization': 'ApiKey <API_KEY_SECRET>'
    'Content-Type': 'multipart/form-data',
}
params = {
    ...
}
files = {
    'file': ('document.pdf;type', open('document.jpg;type', 'rb')),
}

response = requests.post('https://api.natif.ai/documents/', headers=headers, params=params, files=files)

curl -X POST "https://api.natif.ai/documents/ 
-H  "accept: application/json" 
-H  "Authorization: ApiKey <API_KEY_SECRET>" 
-H  "Content-Type: multipart/form-data" -F "file=@invoice.jpg;type=image/jpeg"

import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.Scanner;

class Main {

    public static void main(String[] args) throws IOException {
        URL url = new URL("https://api.natif.ai/documents/");
        HttpURLConnection httpConn = (HttpURLConnection) url.openConnection();
        httpConn.setRequestMethod("POST");

        httpConn.setRequestProperty("accept", "application/json");
        httpConn.setRequestProperty("Authorization", "ApiKey <API_KEY_SECRET>");
        httpConn.setRequestProperty("Content-Type", "multipart/form-data");

        InputStream responseStream = httpConn.getResponseCode() / 100 == 2
                ? httpConn.getInputStream()
                : httpConn.getErrorStream();
        Scanner s = new Scanner(responseStream).useDelimiter("\\A");
        String response = s.hasNext() ? s.next() : "";
        System.out.println(response);
    }

PythonCurlJava

import requests

headers = {
    'accept': 'application/json',
    'Authorization': 'Bearer <access_token>'
    'Content-Type': 'multipart/form-data',
}
params = {
    ...
}
files = {
    'file': ('document.pdf;type', open('document.jpg;type', 'rb')),
}

response = requests.post('https://api.natif.ai/documents/', headers=headers, params=params, files=files)

curl -X POST "https://api.natif.ai/documents/ 
-H  "accept: application/json" 
-H  "Authorization: Bearer <access_token>" 
-H  "Content-Type: multipart/form-data" -F "file=@invoice.jpg;type=image/jpeg"

import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.Scanner;

class Main {

    public static void main(String[] args) throws IOException {
        URL url = new URL("https://api.natif.ai/documents/");
        HttpURLConnection httpConn = (HttpURLConnection) url.openConnection();
        httpConn.setRequestMethod("POST");

        httpConn.setRequestProperty("accept", "application/json");
        httpConn.setRequestProperty("Authorization", "Bearer <access_token>");
        httpConn.setRequestProperty("Content-Type", "multipart/form-data");

        InputStream responseStream = httpConn.getResponseCode() / 100 == 2
                ? httpConn.getInputStream()
                : httpConn.getErrorStream();
        Scanner s = new Scanner(responseStream).useDelimiter("\\A");
        String response = s.hasNext() ? s.next() : "";
        System.out.println(response);
    }

Depending on how you want to process the document, you can specify different parameters that affect the processing flow. See prebuilt APIs for specific parameters.

Once you have uploaded the document you will get a JSON object with meta information about the document, that looks like this:

Document meta JSON

{
  "uuid": "095be615-a8ad-4c33-8e9c-c7612fbf6c9f",
  "processing_status": "success",
  "doc": "2019-08-24T14:15:22Z",
  "created_at": "2019-08-24T14:15:22Z",
  "filename_origin": "document.pdf",
  "page_num": 2,
  "num_pages": 2,
  "document_type": "invoice",
  "postprocessing_status": "reviewed",
  "language": "de",
  "process_instance_id": "6d6206fb-ab10-4b84-88e8-7ea81269bef0",
  "process_instance": {
    "start_time": "2019-08-24T14:15:22Z",
    "end_time": "2019-08-24T14:15:22Z",
    "process_definition_key": "string",
    "status": "active",
    "activity_instances": []
  },
  "retrieved": true
}

The meta information contains a unique identifier uuid that can be used to fetch further document related data.

Checking the processing status#

With the document's uuid you can get the current metadata about the document at any time. In the metadata you will find an entry processing_status. The processing_status allows you to determine whether the requested results, whether OCR, extractions or PDFs have already been created. The processing_status can take one of the following values pending, failed and success.

API keyAccess Token

PythonCurlJava

import requests

headers = {
    'accept': 'application/json',
    'Authorization': 'ApiKey <API_KEY_SECRET>'
    'Content-Type': 'multipart/form-data',
}

response = requests.get('https://api.natif.ai/documents/<uuid>', headers=headers)

curl -X POST "https://api.natif.ai/documents/ 
-H  "accept: application/json" 
-H  "Authorization: ApiKey <API_KEY_SECRET>" 
-H  "Content-Type: multipart/form-data" -F "file=@invoice.jpg;type=image/jpeg"

import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.Scanner;

class Main {

    public static void main(String[] args) throws IOException {
        URL url = new URL("https://api.natif.ai/documents/");
        HttpURLConnection httpConn = (HttpURLConnection) url.openConnection();
        httpConn.setRequestMethod("POST");

        httpConn.setRequestProperty("accept", "application/json");
        httpConn.setRequestProperty("Authorization", "ApiKey <API_KEY_SECRET>");
        httpConn.setRequestProperty("Content-Type", "multipart/form-data");

        InputStream responseStream = httpConn.getResponseCode() / 100 == 2
                ? httpConn.getInputStream()
                : httpConn.getErrorStream();
        Scanner s = new Scanner(responseStream).useDelimiter("\\A");
        String response = s.hasNext() ? s.next() : "";
        System.out.println(response);
    }

PythonCurlJava

import requests

headers = {
    'accept': 'application/json',
    'Authorization': 'Bearer <access_token>'
    'Content-Type': 'multipart/form-data',
}

response = requests.get('https://api.natif.ai/documents/<uuid>', headers=headers)

curl -X POST "https://api.natif.ai/documents/ 
-H  "accept: application/json" 
-H  "Authorization: Bearer <access_token>" 
-H  "Content-Type: multipart/form-data" -F "file=@invoice.jpg;type=image/jpeg"

import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.Scanner;

class Main {

    public static void main(String[] args) throws IOException {
        URL url = new URL("https://api.natif.ai/documents/");
        HttpURLConnection httpConn = (HttpURLConnection) url.openConnection();
        httpConn.setRequestMethod("POST");

        httpConn.setRequestProperty("accept", "application/json");
        httpConn.setRequestProperty("Authorization", "Bearer <access_token>");
        httpConn.setRequestProperty("Content-Type", "multipart/form-data");

        InputStream responseStream = httpConn.getResponseCode() / 100 == 2
                ? httpConn.getInputStream()
                : httpConn.getErrorStream();
        Scanner s = new Scanner(responseStream).useDelimiter("\\A");
        String response = s.hasNext() ? s.next() : "";
        System.out.println(response);
    }

Fetching results#

In the case the processing_status is success you can fetch different information about the document, depending on how you uploaded it:

You can directly fetch the OCR results in different formats
Depending on the inferred or chosen document type you can fetch the corresponding document extractions.
You can even fetch the PDF document enriched by different information as an invisible OCR layer.
During processing some corrections like orientation and skew are performed. You can download the page images as JPGs.

OCR results#

API keyAccess Token

PythonCurlJava

import requests

headers = {
    'accept': 'application/json',
    'Authorization': 'ApiKey <API_KEY_SECRET>'
    'Content-Type': 'multipart/form-data',
}

response = requests.get('https://api.natif.ai/documents/<uuid>/ocr', headers=headers)

curl -X POST "https://api.natif.ai/documents/ 
-H  "accept: application/json" 
-H  "Authorization: ApiKey <API_KEY_SECRET>" 
-H  "Content-Type: multipart/form-data" -F "file=@invoice.jpg;type=image/jpeg"

import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.Scanner;

class Main {

    public static void main(String[] args) throws IOException {
        URL url = new URL("https://api.natif.ai/documents/");
        HttpURLConnection httpConn = (HttpURLConnection) url.openConnection();
        httpConn.setRequestMethod("POST");

        httpConn.setRequestProperty("accept", "application/json");
        httpConn.setRequestProperty("Authorization", "ApiKey <API_KEY_SECRET>");
        httpConn.setRequestProperty("Content-Type", "multipart/form-data");

        InputStream responseStream = httpConn.getResponseCode() / 100 == 2
                ? httpConn.getInputStream()
                : httpConn.getErrorStream();
        Scanner s = new Scanner(responseStream).useDelimiter("\\A");
        String response = s.hasNext() ? s.next() : "";
        System.out.println(response);
    }

PythonCurlJava

import requests

headers = {
    'accept': 'application/json',
    'Authorization': 'Bearer <access_token>'
    'Content-Type': 'multipart/form-data',
}

response = requests.get('https://api.natif.ai/documents/<uuid>/ocr', headers=headers)

curl -X POST "https://api.natif.ai/documents/ 
-H  "accept: application/json" 
-H  "Authorization: Bearer <access_token>" 
-H  "Content-Type: multipart/form-data" -F "file=@invoice.jpg;type=image/jpeg"

import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.Scanner;

class Main {

    public static void main(String[] args) throws IOException {
        URL url = new URL("https://api.natif.ai/documents/");
        HttpURLConnection httpConn = (HttpURLConnection) url.openConnection();
        httpConn.setRequestMethod("POST");

        httpConn.setRequestProperty("accept", "application/json");
        httpConn.setRequestProperty("Authorization", "Bearer <access_token>");
        httpConn.setRequestProperty("Content-Type", "multipart/form-data");

        InputStream responseStream = httpConn.getResponseCode() / 100 == 2
                ? httpConn.getInputStream()
                : httpConn.getErrorStream();
        Scanner s = new Scanner(responseStream).useDelimiter("\\A");
        String response = s.hasNext() ? s.next() : "";
        System.out.println(response);
    }

Extraction results#

API keyAccess Token

PythonCurlJava

import requests

headers = {
    'accept': 'application/json',
    'Authorization': 'ApiKey <API_KEY_SECRET>'
    'Content-Type': 'multipart/form-data',
}

response = requests.get('https://api.natif.ai/documents/<uuid>/extractions', headers=headers)

curl -X POST "https://api.natif.ai/documents/ 
-H  "accept: application/json" 
-H  "Authorization: ApiKey <API_KEY_SECRET>" 
-H  "Content-Type: multipart/form-data" -F "file=@invoice.jpg;type=image/jpeg"

import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.Scanner;

class Main {

    public static void main(String[] args) throws IOException {
        URL url = new URL("https://api.natif.ai/documents/");
        HttpURLConnection httpConn = (HttpURLConnection) url.openConnection();
        httpConn.setRequestMethod("POST");

        httpConn.setRequestProperty("accept", "application/json");
        httpConn.setRequestProperty("Authorization", "ApiKey <API_KEY_SECRET>");
        httpConn.setRequestProperty("Content-Type", "multipart/form-data");

        InputStream responseStream = httpConn.getResponseCode() / 100 == 2
                ? httpConn.getInputStream()
                : httpConn.getErrorStream();
        Scanner s = new Scanner(responseStream).useDelimiter("\\A");
        String response = s.hasNext() ? s.next() : "";
        System.out.println(response);
    }

PythonCurlJava

import requests

headers = {
    'accept': 'application/json',
    'Authorization': 'Bearer <access_token>'
    'Content-Type': 'multipart/form-data',
}

response = requests.get('https://api.natif.ai/documents/<uuid>/extractions', headers=headers)

curl -X POST "https://api.natif.ai/documents/ 
-H  "accept: application/json" 
-H  "Authorization: Bearer <access_token>" 
-H  "Content-Type: multipart/form-data" -F "file=@invoice.jpg;type=image/jpeg"

import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.Scanner;

class Main {

    public static void main(String[] args) throws IOException {
        URL url = new URL("https://api.natif.ai/documents/");
        HttpURLConnection httpConn = (HttpURLConnection) url.openConnection();
        httpConn.setRequestMethod("POST");

        httpConn.setRequestProperty("accept", "application/json");
        httpConn.setRequestProperty("Authorization", "Bearer <access_token>");
        httpConn.setRequestProperty("Content-Type", "multipart/form-data");

        InputStream responseStream = httpConn.getResponseCode() / 100 == 2
                ? httpConn.getInputStream()
                : httpConn.getErrorStream();
        Scanner s = new Scanner(responseStream).useDelimiter("\\A");
        String response = s.hasNext() ? s.next() : "";
        System.out.println(response);
    }

PDFs#

API keyAccess Token

PythonCurlJava

import requests

headers = {
    'accept': 'application/json',
    'Authorization': 'ApiKey <API_KEY_SECRET>'
    'Content-Type': 'multipart/form-data',
}

response = requests.get('https://api.natif.ai/documents/<uuid>/pdf', headers=headers)

curl -X POST "https://api.natif.ai/documents/ 
-H  "accept: application/json" 
-H  "Authorization: ApiKey <API_KEY_SECRET>" 
-H  "Content-Type: multipart/form-data" -F "file=@invoice.jpg;type=image/jpeg"

import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.Scanner;

class Main {

    public static void main(String[] args) throws IOException {
        URL url = new URL("https://api.natif.ai/documents/");
        HttpURLConnection httpConn = (HttpURLConnection) url.openConnection();
        httpConn.setRequestMethod("POST");

        httpConn.setRequestProperty("accept", "application/json");
        httpConn.setRequestProperty("Authorization", "ApiKey <API_KEY_SECRET>");
        httpConn.setRequestProperty("Content-Type", "multipart/form-data");

        InputStream responseStream = httpConn.getResponseCode() / 100 == 2
                ? httpConn.getInputStream()
                : httpConn.getErrorStream();
        Scanner s = new Scanner(responseStream).useDelimiter("\\A");
        String response = s.hasNext() ? s.next() : "";
        System.out.println(response);
    }

PythonCurlJava

import requests

headers = {
    'accept': 'application/json',
    'Authorization': 'Bearer <access_token>'
    'Content-Type': 'multipart/form-data',
}

response = requests.get('https://api.natif.ai/documents/<uuid>/pdf', headers=headers)

curl -X POST "https://api.natif.ai/documents/ 
-H  "accept: application/json" 
-H  "Authorization: Bearer <access_token>" 
-H  "Content-Type: multipart/form-data" -F "file=@invoice.jpg;type=image/jpeg"

import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.Scanner;

class Main {

    public static void main(String[] args) throws IOException {
        URL url = new URL("https://api.natif.ai/documents/");
        HttpURLConnection httpConn = (HttpURLConnection) url.openConnection();
        httpConn.setRequestMethod("POST");

        httpConn.setRequestProperty("accept", "application/json");
        httpConn.setRequestProperty("Authorization", "Bearer <access_token>");
        httpConn.setRequestProperty("Content-Type", "multipart/form-data");

        InputStream responseStream = httpConn.getResponseCode() / 100 == 2
                ? httpConn.getInputStream()
                : httpConn.getErrorStream();
        Scanner s = new Scanner(responseStream).useDelimiter("\\A");
        String response = s.hasNext() ? s.next() : "";
        System.out.println(response);
    }