Document endpoints#
Danger
The /documents/
endpoints are deprecated and being replaced by /processing/
endpoints based on workflows.
This document is meant as a maintenance reference for pre-existing client implementations.
For historical and compatibility reasons, besides the processing, you can
also upload documents for processing by issuing an HTTP POST request to the /documents
endpoint.
Uploading a document#
Info
Similar technical limitations as for the /processing
endpoint apply.
An upload request needs to be authenticated using an access_token
, or
preferably with an API key.
A document upload could look like this:
import requests
headers = {
'accept': 'application/json',
'Authorization': 'ApiKey <API_KEY_SECRET>'
'Content-Type': 'multipart/form-data',
}
params = {
...
}
files = {
'file': ('document.pdf;type', open('document.jpg;type', 'rb')),
}
response = requests.post('https://api.natif.ai/documents/', headers=headers, params=params, files=files)
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.Scanner;
class Main {
public static void main(String[] args) throws IOException {
URL url = new URL("https://api.natif.ai/documents/");
HttpURLConnection httpConn = (HttpURLConnection) url.openConnection();
httpConn.setRequestMethod("POST");
httpConn.setRequestProperty("accept", "application/json");
httpConn.setRequestProperty("Authorization", "ApiKey <API_KEY_SECRET>");
httpConn.setRequestProperty("Content-Type", "multipart/form-data");
InputStream responseStream = httpConn.getResponseCode() / 100 == 2
? httpConn.getInputStream()
: httpConn.getErrorStream();
Scanner s = new Scanner(responseStream).useDelimiter("\\A");
String response = s.hasNext() ? s.next() : "";
System.out.println(response);
}
import requests
headers = {
'accept': 'application/json',
'Authorization': 'Bearer <access_token>'
'Content-Type': 'multipart/form-data',
}
params = {
...
}
files = {
'file': ('document.pdf;type', open('document.jpg;type', 'rb')),
}
response = requests.post('https://api.natif.ai/documents/', headers=headers, params=params, files=files)
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.Scanner;
class Main {
public static void main(String[] args) throws IOException {
URL url = new URL("https://api.natif.ai/documents/");
HttpURLConnection httpConn = (HttpURLConnection) url.openConnection();
httpConn.setRequestMethod("POST");
httpConn.setRequestProperty("accept", "application/json");
httpConn.setRequestProperty("Authorization", "Bearer <access_token>");
httpConn.setRequestProperty("Content-Type", "multipart/form-data");
InputStream responseStream = httpConn.getResponseCode() / 100 == 2
? httpConn.getInputStream()
: httpConn.getErrorStream();
Scanner s = new Scanner(responseStream).useDelimiter("\\A");
String response = s.hasNext() ? s.next() : "";
System.out.println(response);
}
Depending on how you want to process the document, you can specify different parameters that affect the processing flow. See prebuilt APIs for specific parameters.
Once you have uploaded the document you will get a JSON object with meta information about the document, that looks like this:
Document meta JSON
{
"uuid": "095be615-a8ad-4c33-8e9c-c7612fbf6c9f",
"processing_status": "success",
"doc": "2019-08-24T14:15:22Z",
"created_at": "2019-08-24T14:15:22Z",
"filename_origin": "document.pdf",
"page_num": 2,
"num_pages": 2,
"document_type": "invoice",
"postprocessing_status": "reviewed",
"language": "de",
"process_instance_id": "6d6206fb-ab10-4b84-88e8-7ea81269bef0",
"process_instance": {
"start_time": "2019-08-24T14:15:22Z",
"end_time": "2019-08-24T14:15:22Z",
"process_definition_key": "string",
"status": "active",
"activity_instances": []
},
"retrieved": true
}
The meta information contains a unique identifier uuid
that can be used to fetch further document related data.
Checking the processing status#
With the document's uuid
you can get the current metadata about the document at any time.
In the metadata you will find an entry processing_status
. The processing_status
allows you to determine whether the
requested results, whether OCR, extractions or PDFs have already been created.
The processing_status
can take one of the following values pending
, failed
and success
.
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.Scanner;
class Main {
public static void main(String[] args) throws IOException {
URL url = new URL("https://api.natif.ai/documents/");
HttpURLConnection httpConn = (HttpURLConnection) url.openConnection();
httpConn.setRequestMethod("POST");
httpConn.setRequestProperty("accept", "application/json");
httpConn.setRequestProperty("Authorization", "ApiKey <API_KEY_SECRET>");
httpConn.setRequestProperty("Content-Type", "multipart/form-data");
InputStream responseStream = httpConn.getResponseCode() / 100 == 2
? httpConn.getInputStream()
: httpConn.getErrorStream();
Scanner s = new Scanner(responseStream).useDelimiter("\\A");
String response = s.hasNext() ? s.next() : "";
System.out.println(response);
}
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.Scanner;
class Main {
public static void main(String[] args) throws IOException {
URL url = new URL("https://api.natif.ai/documents/");
HttpURLConnection httpConn = (HttpURLConnection) url.openConnection();
httpConn.setRequestMethod("POST");
httpConn.setRequestProperty("accept", "application/json");
httpConn.setRequestProperty("Authorization", "Bearer <access_token>");
httpConn.setRequestProperty("Content-Type", "multipart/form-data");
InputStream responseStream = httpConn.getResponseCode() / 100 == 2
? httpConn.getInputStream()
: httpConn.getErrorStream();
Scanner s = new Scanner(responseStream).useDelimiter("\\A");
String response = s.hasNext() ? s.next() : "";
System.out.println(response);
}
Fetching results#
In the case the processing_status
is success
you can fetch different information about the document, depending on
how you uploaded it:
- You can directly fetch the OCR results in different formats
- Depending on the inferred or chosen document type you can fetch the corresponding document extractions.
- You can even fetch the PDF document enriched by different information as an invisible OCR layer.
- During processing some corrections like orientation and skew are performed. You can download the page images as JPGs.
OCR results#
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.Scanner;
class Main {
public static void main(String[] args) throws IOException {
URL url = new URL("https://api.natif.ai/documents/");
HttpURLConnection httpConn = (HttpURLConnection) url.openConnection();
httpConn.setRequestMethod("POST");
httpConn.setRequestProperty("accept", "application/json");
httpConn.setRequestProperty("Authorization", "ApiKey <API_KEY_SECRET>");
httpConn.setRequestProperty("Content-Type", "multipart/form-data");
InputStream responseStream = httpConn.getResponseCode() / 100 == 2
? httpConn.getInputStream()
: httpConn.getErrorStream();
Scanner s = new Scanner(responseStream).useDelimiter("\\A");
String response = s.hasNext() ? s.next() : "";
System.out.println(response);
}
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.Scanner;
class Main {
public static void main(String[] args) throws IOException {
URL url = new URL("https://api.natif.ai/documents/");
HttpURLConnection httpConn = (HttpURLConnection) url.openConnection();
httpConn.setRequestMethod("POST");
httpConn.setRequestProperty("accept", "application/json");
httpConn.setRequestProperty("Authorization", "Bearer <access_token>");
httpConn.setRequestProperty("Content-Type", "multipart/form-data");
InputStream responseStream = httpConn.getResponseCode() / 100 == 2
? httpConn.getInputStream()
: httpConn.getErrorStream();
Scanner s = new Scanner(responseStream).useDelimiter("\\A");
String response = s.hasNext() ? s.next() : "";
System.out.println(response);
}
Extraction results#
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.Scanner;
class Main {
public static void main(String[] args) throws IOException {
URL url = new URL("https://api.natif.ai/documents/");
HttpURLConnection httpConn = (HttpURLConnection) url.openConnection();
httpConn.setRequestMethod("POST");
httpConn.setRequestProperty("accept", "application/json");
httpConn.setRequestProperty("Authorization", "ApiKey <API_KEY_SECRET>");
httpConn.setRequestProperty("Content-Type", "multipart/form-data");
InputStream responseStream = httpConn.getResponseCode() / 100 == 2
? httpConn.getInputStream()
: httpConn.getErrorStream();
Scanner s = new Scanner(responseStream).useDelimiter("\\A");
String response = s.hasNext() ? s.next() : "";
System.out.println(response);
}
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.Scanner;
class Main {
public static void main(String[] args) throws IOException {
URL url = new URL("https://api.natif.ai/documents/");
HttpURLConnection httpConn = (HttpURLConnection) url.openConnection();
httpConn.setRequestMethod("POST");
httpConn.setRequestProperty("accept", "application/json");
httpConn.setRequestProperty("Authorization", "Bearer <access_token>");
httpConn.setRequestProperty("Content-Type", "multipart/form-data");
InputStream responseStream = httpConn.getResponseCode() / 100 == 2
? httpConn.getInputStream()
: httpConn.getErrorStream();
Scanner s = new Scanner(responseStream).useDelimiter("\\A");
String response = s.hasNext() ? s.next() : "";
System.out.println(response);
}
PDFs#
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.Scanner;
class Main {
public static void main(String[] args) throws IOException {
URL url = new URL("https://api.natif.ai/documents/");
HttpURLConnection httpConn = (HttpURLConnection) url.openConnection();
httpConn.setRequestMethod("POST");
httpConn.setRequestProperty("accept", "application/json");
httpConn.setRequestProperty("Authorization", "ApiKey <API_KEY_SECRET>");
httpConn.setRequestProperty("Content-Type", "multipart/form-data");
InputStream responseStream = httpConn.getResponseCode() / 100 == 2
? httpConn.getInputStream()
: httpConn.getErrorStream();
Scanner s = new Scanner(responseStream).useDelimiter("\\A");
String response = s.hasNext() ? s.next() : "";
System.out.println(response);
}
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.Scanner;
class Main {
public static void main(String[] args) throws IOException {
URL url = new URL("https://api.natif.ai/documents/");
HttpURLConnection httpConn = (HttpURLConnection) url.openConnection();
httpConn.setRequestMethod("POST");
httpConn.setRequestProperty("accept", "application/json");
httpConn.setRequestProperty("Authorization", "Bearer <access_token>");
httpConn.setRequestProperty("Content-Type", "multipart/form-data");
InputStream responseStream = httpConn.getResponseCode() / 100 == 2
? httpConn.getInputStream()
: httpConn.getErrorStream();
Scanner s = new Scanner(responseStream).useDelimiter("\\A");
String response = s.hasNext() ? s.next() : "";
System.out.println(response);
}