вопрос :

мне нужно спарсить сценаристов с imdb.de

актеров иа беру так:

Код:

...
            'actors_links' => 'table.cast tr td.nm a',
            'actor_birthday' => 'h5:contains("Geburtstag") + div.info-content',
            'actor_death' => 'h5:contains("Todestag") + div.info-content',
            'actor_height' => 'h5:contains("GrцЯe") + div.info-content',
            'actor_nickname' => 'h5:contains("Spitzname") + div.info-content'
...
private function get_actors(){
        $ch = curl_init();
        curl_setopt($ch, CURLOPT_USERAGENT, USER_AGENT);
        curl_setopt($ch, CURLOPT_URL, BASE_PARSE_URL.$this->link.'fullcredits');
        curl_setopt($ch, CURLOPT_FAILONERROR, true);
        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
        curl_setopt($ch, CURLOPT_AUTOREFERER, true);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER,true);
        curl_setopt($ch, CURLOPT_TIMEOUT, 3);

        do{
//            curl_setopt($ch, CURLOPT_PROXY, $this->get_random_proxy());
            $html = curl_exec($ch);
            $error = curl_errno($ch);
        }
        while($error!=0);
        curl_close($ch);
        if($html){
            $doc = phpQuery::newDocumentHTML($html);
            $actors_links = array();

            $actors_links_raw = $doc[$this->path['actors_links']];
            foreach ($actors_links_raw as $actors_link) {
                //TODO:надо прикрутить проверку чтобы повторно не парсить
                $actors_links[] = array(
                    'link' => $actors_link->getAttribute('href'),
                    'name' => $actors_link->nodeValue
                );
            }
            $actors = array();
            foreach ($actors_links as $actor_link) {
                $actors[] = $this->get_actor_info($actor_link);
            }
            $this->film['actors'] = $actors;
        }
    }

    private function get_actor_info($link){
        set_time_limit(0);
        $ch = curl_init();
        curl_setopt($ch, CURLOPT_USERAGENT, USER_AGENT);
        curl_setopt($ch, CURLOPT_URL, BASE_PARSE_URL.$link['link']);
        curl_setopt($ch, CURLOPT_FAILONERROR, true);
        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
        curl_setopt($ch, CURLOPT_AUTOREFERER, true);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER,true);
        curl_setopt($ch, CURLOPT_TIMEOUT, 3);

        do{
//            curl_setopt($ch, CURLOPT_PROXY, $this->get_random_proxy());
            $html = curl_exec($ch);
            $error = curl_errno($ch);
        }
        while($error!=0);
        curl_close($ch);

        if($html){
            $actor_doc = phpQuery::newDocumentHTML($html);

            $actor = array();

            $actor['link'] = $link['link'];
            $actor['id'] = preg_replace('/[^0-9]/s', '', $link['link']);

            $actor['name'] = $link['name'];
            $birthday = pq($this->path['actor_birthday']);
            if($birthday->text()!=''){
                $birthday = preg_replace('/\[(.*)?]/s', '', $birthday->text());
                $birthday = explode(',', $birthday);
//                получаем место рождения, в зависимости от указанных данных
                if(count($birthday)>3){
                    $data = array(
                        'country' => trim($birthday[3]),
                        'state' => trim($birthday[2]),
                        'city' => trim($birthday[1]),
                        'birthday_date' => $birthday[0]
                    );
                } elseif(count($birthday)==3) {
                    $data = array(
                        'country' => trim($birthday[2]),
                        'city' => trim($birthday[1]),
                        'birthday_date' => $birthday[0]
                    );
                } elseif(count($birthday)==2){
                    $data = array(
                        'country' => trim($birthday[1]),
                        'birthday_date' => $birthday[0]
                    );
                } elseif(count($birthday)==1){
                    $data = array(
                        'birthday_date' => $birthday[0]
                    );
                }
//                парсим дату рождения
                $tmpDate = explode(' ', $data['birthday_date']);
                foreach($tmpDate as $key=>$value){
                    $tmpDate[$key] = trim($value);
                }
                if(count ($tmpDate) == 3){
//              день-месяц-год
                    $tmpDate[0] = str_replace('.', '', $tmpDate[0]);
                    $data['birthday_date'] = $tmpDate[2].'-'.get_month($tmpDate[1]).'-'.$tmpDate[0];
                } elseif(count ($tmpDate) == 2) {
//              месяц-год
                    $data['birthday_date'] = $tmpDate[1].'-'.get_month($tmpDate[0]).'-00';
                } elseif(count ($tmpDate) == 1) {
//              год
                    $data['birthday_date'] = $tmpDate[0].'-00-00';
                }
                $actor['birthday'] = $data;
            }

            $death = pq($this->path['actor_death']);
            if($death->text()!=''){
//                дата смерти
                $date = trim(preg_replace('/\,(.*)/s', '', $death->text()));
                $tmpDate = explode(' ', $date);
                foreach($tmpDate as $key=>$value){
                    $tmpDate[$key] = trim($value);
                }
                if(count ($tmpDate) == 3){
//              день-месяц-год
                    $tmpDate[0] = str_replace('.', '', $tmpDate[0]);
                    $actor['death']  = $tmpDate[2].'-'.get_month($tmpDate[1]).'-'.$tmpDate[0];
                } elseif(count ($tmpDate) == 2) {
//              месяц-год
                    $actor['death'] = $tmpDate[1].'-'.get_month($tmpDate[0]).'-00';
                } elseif(count ($tmpDate) == 1) {
//              год
                    $actor['death'] = $tmpDate[0].'-00-00';
                }

            }
            
            $height = pq($this->path['actor_height']);
            if($height->text()!=''){
                $actor['height'] = trim(preg_replace('/[^0-9,]/s', '', $height->text()));
            }

            $nickname = pq($this->path['actor_nickname']);
            if($nickname->text()!=''){
                $actor['nickname'] = trim($nickname->text());
            }

            return $actor;
        }
но они находятся в fullcredits.

как мне спарсить сценаристов ?